dect
/
linux-2.6
Archived
13
0
Fork 0
This repository has been archived on 2022-02-17. You can view files and clone it, but cannot push or open issues or pull requests.
linux-2.6/arch/powerpc/include/uapi/asm/kvm.h

417 lines
11 KiB
C
Raw Normal View History

/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
*/
#ifndef __LINUX_KVM_POWERPC_H
#define __LINUX_KVM_POWERPC_H
#include <linux/types.h>
/* Select powerpc specific features in <linux/kvm.h> */
#define __KVM_HAVE_SPAPR_TCE
KVM: PPC: Allow book3s_hv guests to use SMT processor modes This lifts the restriction that book3s_hv guests can only run one hardware thread per core, and allows them to use up to 4 threads per core on POWER7. The host still has to run single-threaded. This capability is advertised to qemu through a new KVM_CAP_PPC_SMT capability. The return value of the ioctl querying this capability is the number of vcpus per virtual CPU core (vcore), currently 4. To use this, the host kernel should be booted with all threads active, and then all the secondary threads should be offlined. This will put the secondary threads into nap mode. KVM will then wake them from nap mode and use them for running guest code (while they are still offline). To wake the secondary threads, we send them an IPI using a new xics_wake_cpu() function, implemented in arch/powerpc/sysdev/xics/icp-native.c. In other words, at this stage we assume that the platform has a XICS interrupt controller and we are using icp-native.c to drive it. Since the woken thread will need to acknowledge and clear the IPI, we also export the base physical address of the XICS registers using kvmppc_set_xics_phys() for use in the low-level KVM book3s code. When a vcpu is created, it is assigned to a virtual CPU core. The vcore number is obtained by dividing the vcpu number by the number of threads per core in the host. This number is exported to userspace via the KVM_CAP_PPC_SMT capability. If qemu wishes to run the guest in single-threaded mode, it should make all vcpu numbers be multiples of the number of threads per core. We distinguish three states of a vcpu: runnable (i.e., ready to execute the guest), blocked (that is, idle), and busy in host. We currently implement a policy that the vcore can run only when all its threads are runnable or blocked. This way, if a vcpu needs to execute elsewhere in the kernel or in qemu, it can do so without being starved of CPU by the other vcpus. When a vcore starts to run, it executes in the context of one of the vcpu threads. The other vcpu threads all go to sleep and stay asleep until something happens requiring the vcpu thread to return to qemu, or to wake up to run the vcore (this can happen when another vcpu thread goes from busy in host state to blocked). It can happen that a vcpu goes from blocked to runnable state (e.g. because of an interrupt), and the vcore it belongs to is already running. In that case it can start to run immediately as long as the none of the vcpus in the vcore have started to exit the guest. We send the next free thread in the vcore an IPI to get it to start to execute the guest. It synchronizes with the other threads via the vcore->entry_exit_count field to make sure that it doesn't go into the guest if the other vcpus are exiting by the time that it is ready to actually enter the guest. Note that there is no fixed relationship between the hardware thread number and the vcpu number. Hardware threads are assigned to vcpus as they become runnable, so we will always use the lower-numbered hardware threads in preference to higher-numbered threads if not all the vcpus in the vcore are runnable, regardless of which vcpus are runnable. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 00:23:08 +00:00
#define __KVM_HAVE_PPC_SMT
struct kvm_regs {
__u64 pc;
__u64 cr;
__u64 ctr;
__u64 lr;
__u64 xer;
__u64 msr;
__u64 srr0;
__u64 srr1;
__u64 pid;
__u64 sprg0;
__u64 sprg1;
__u64 sprg2;
__u64 sprg3;
__u64 sprg4;
__u64 sprg5;
__u64 sprg6;
__u64 sprg7;
__u64 gpr[32];
};
#define KVM_SREGS_E_IMPL_NONE 0
#define KVM_SREGS_E_IMPL_FSL 1
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
/*
* Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
* corresponding to unset feature bits will not be modified. This allows
* restoring a checkpoint made without that feature, while keeping the
* default values of the new registers.
*
* KVM_SREGS_E_BASE contains:
* CSRR0/1 (refers to SRR2/3 on 40x)
* ESR
* DEAR
* MCSR
* TSR
* TCR
* DEC
* TB
* VRSAVE (USPRG0)
*/
#define KVM_SREGS_E_BASE (1 << 0)
/*
* KVM_SREGS_E_ARCH206 contains:
*
* PIR
* MCSRR0/1
* DECAR
* IVPR
*/
#define KVM_SREGS_E_ARCH206 (1 << 1)
/*
* Contains EPCR, plus the upper half of 64-bit registers
* that are 32-bit on 32-bit implementations.
*/
#define KVM_SREGS_E_64 (1 << 2)
#define KVM_SREGS_E_SPRG8 (1 << 3)
#define KVM_SREGS_E_MCIVPR (1 << 4)
/*
* IVORs are used -- contains IVOR0-15, plus additional IVORs
* in combination with an appropriate feature bit.
*/
#define KVM_SREGS_E_IVOR (1 << 5)
/*
* Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
* Also TLBnPS if MMUCFG[MAVN] = 1.
*/
#define KVM_SREGS_E_ARCH206_MMU (1 << 6)
/* DBSR, DBCR, IAC, DAC, DVC */
#define KVM_SREGS_E_DEBUG (1 << 7)
/* Enhanced debug -- DSRR0/1, SPRG9 */
#define KVM_SREGS_E_ED (1 << 8)
/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_SPE (1 << 9)
/* External Proxy (EXP) -- EPR */
#define KVM_SREGS_EXP (1 << 10)
/* External PID (E.PD) -- EPSC/EPLC */
#define KVM_SREGS_E_PD (1 << 11)
/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_PC (1 << 12)
/* Page table (E.PT) -- EPTCFG */
#define KVM_SREGS_E_PT (1 << 13)
/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
#define KVM_SREGS_E_PM (1 << 14)
/*
* Special updates:
*
* Some registers may change even while a vcpu is not running.
* To avoid losing these changes, by default these registers are
* not updated by KVM_SET_SREGS. To force an update, set the bit
* in u.e.update_special corresponding to the register to be updated.
*
* The update_special field is zero on return from KVM_GET_SREGS.
*
* When restoring a checkpoint, the caller can set update_special
* to 0xffffffff to ensure that everything is restored, even new features
* that the caller doesn't know about.
*/
#define KVM_SREGS_E_UPDATE_MCSR (1 << 0)
#define KVM_SREGS_E_UPDATE_TSR (1 << 1)
#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
/*
* In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
* previous KVM_GET_REGS.
*
* Unless otherwise indicated, setting any register with KVM_SET_SREGS
* directly sets its value. It does not trigger any special semantics such
* as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct
* just received from KVM_GET_SREGS is always a no-op.
*/
struct kvm_sregs {
__u32 pvr;
union {
struct {
__u64 sdr1;
struct {
struct {
__u64 slbe;
__u64 slbv;
} slb[64];
} ppc64;
struct {
__u32 sr[16];
__u64 ibat[8];
__u64 dbat[8];
} ppc32;
} s;
struct {
union {
struct { /* KVM_SREGS_E_IMPL_FSL */
__u32 features; /* KVM_SREGS_E_FSL_ */
__u32 svr;
__u64 mcar;
__u32 hid0;
/* KVM_SREGS_E_FSL_PIDn */
__u32 pid1, pid2;
} fsl;
__u8 pad[256];
} impl;
__u32 features; /* KVM_SREGS_E_ */
__u32 impl_id; /* KVM_SREGS_E_IMPL_ */
__u32 update_special; /* KVM_SREGS_E_UPDATE_ */
__u32 pir; /* read-only */
__u64 sprg8;
__u64 sprg9; /* E.ED */
__u64 csrr0;
__u64 dsrr0; /* E.ED */
__u64 mcsrr0;
__u32 csrr1;
__u32 dsrr1; /* E.ED */
__u32 mcsrr1;
__u32 esr;
__u64 dear;
__u64 ivpr;
__u64 mcivpr;
__u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */
__u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */
__u32 tcr;
__u32 decar;
__u32 dec; /* KVM_SREGS_E_UPDATE_DEC */
/*
* Userspace can read TB directly, but the
* value reported here is consistent with "dec".
*
* Read-only.
*/
__u64 tb;
__u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
__u32 dbcr[3];
/*
* iac/dac registers are 64bit wide, while this API
* interface provides only lower 32 bits on 64 bit
* processors. ONE_REG interface is added for 64bit
* iac/dac registers.
*/
__u32 iac[4];
__u32 dac[2];
__u32 dvc[2];
__u8 num_iac; /* read-only */
__u8 num_dac; /* read-only */
__u8 num_dvc; /* read-only */
__u8 pad;
__u32 epr; /* EXP */
__u32 vrsave; /* a.k.a. USPRG0 */
__u32 epcr; /* KVM_SREGS_E_64 */
__u32 mas0;
__u32 mas1;
__u64 mas2;
__u64 mas7_3;
__u32 mas4;
__u32 mas6;
__u32 ivor_low[16]; /* IVOR0-15 */
__u32 ivor_high[18]; /* IVOR32+, plus room to expand */
__u32 mmucfg; /* read-only */
__u32 eptcfg; /* E.PT, read-only */
__u32 tlbcfg[4];/* read-only */
__u32 tlbps[4]; /* read-only */
__u32 eplc, epsc; /* E.PD */
} e;
__u8 pad[1020];
} u;
};
struct kvm_fpu {
__u64 fpr[32];
};
struct kvm_debug_exit_arch {
};
/* for KVM_SET_GUEST_DEBUG */
struct kvm_guest_debug_arch {
};
/* definition of registers in kvm_run */
struct kvm_sync_regs {
};
#define KVM_INTERRUPT_SET -1U
#define KVM_INTERRUPT_UNSET -2U
#define KVM_INTERRUPT_SET_LEVEL -3U
#define KVM_CPU_440 1
#define KVM_CPU_E500V2 2
#define KVM_CPU_3S_32 3
#define KVM_CPU_3S_64 4
#define KVM_CPU_E500MC 5
/* for KVM_CAP_SPAPR_TCE */
struct kvm_create_spapr_tce {
__u64 liobn;
__u32 window_size;
};
KVM: PPC: Allocate RMAs (Real Mode Areas) at boot for use by guests This adds infrastructure which will be needed to allow book3s_hv KVM to run on older POWER processors, including PPC970, which don't support the Virtual Real Mode Area (VRMA) facility, but only the Real Mode Offset (RMO) facility. These processors require a physically contiguous, aligned area of memory for each guest. When the guest does an access in real mode (MMU off), the address is compared against a limit value, and if it is lower, the address is ORed with an offset value (from the Real Mode Offset Register (RMOR)) and the result becomes the real address for the access. The size of the RMA has to be one of a set of supported values, which usually includes 64MB, 128MB, 256MB and some larger powers of 2. Since we are unlikely to be able to allocate 64MB or more of physically contiguous memory after the kernel has been running for a while, we allocate a pool of RMAs at boot time using the bootmem allocator. The size and number of the RMAs can be set using the kvm_rma_size=xx and kvm_rma_count=xx kernel command line options. KVM exports a new capability, KVM_CAP_PPC_RMA, to signal the availability of the pool of preallocated RMAs. The capability value is 1 if the processor can use an RMA but doesn't require one (because it supports the VRMA facility), or 2 if the processor requires an RMA for each guest. This adds a new ioctl, KVM_ALLOCATE_RMA, which allocates an RMA from the pool and returns a file descriptor which can be used to map the RMA. It also returns the size of the RMA in the argument structure. Having an RMA means we will get multiple KMV_SET_USER_MEMORY_REGION ioctl calls from userspace. To cope with this, we now preallocate the kvm->arch.ram_pginfo array when the VM is created with a size sufficient for up to 64GB of guest memory. Subsequently we will get rid of this array and use memory associated with each memslot instead. This moves most of the code that translates the user addresses into host pfns (page frame numbers) out of kvmppc_prepare_vrma up one level to kvmppc_core_prepare_memory_region. Also, instead of having to look up the VMA for each page in order to check the page size, we now check that the pages we get are compound pages of 16MB. However, if we are adding memory that is mapped to an RMA, we don't bother with calling get_user_pages_fast and instead just offset from the base pfn for the RMA. Typically the RMA gets added after vcpus are created, which makes it inconvenient to have the LPCR (logical partition control register) value in the vcpu->arch struct, since the LPCR controls whether the processor uses RMA or VRMA for the guest. This moves the LPCR value into the kvm->arch struct and arranges for the MER (mediated external request) bit, which is the only bit that varies between vcpus, to be set in assembly code when going into the guest if there is a pending external interrupt request. Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Alexander Graf <agraf@suse.de>
2011-06-29 00:25:44 +00:00
/* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma {
__u64 rma_size;
};
struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
__u64 mas2;
__u64 mas7_3;
};
struct kvm_book3e_206_tlb_params {
/*
* For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
*
* - The number of ways of TLB0 must be a power of two between 2 and
* 16.
* - TLB1 must be fully associative.
* - The size of TLB0 must be a multiple of the number of ways, and
* the number of sets must be a power of two.
* - The size of TLB1 may not exceed 64 entries.
* - TLB0 supports 4 KiB pages.
* - The page sizes supported by TLB1 are as indicated by
* TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
* as returned by KVM_GET_SREGS.
* - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
* and tlb_ways[] must be zero.
*
* tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
*
* KVM will adjust TLBnCFG based on the sizes configured here,
* though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
* set to zero.
*/
__u32 tlb_sizes[4];
__u32 tlb_ways[4];
__u32 reserved[8];
};
KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT A new ioctl, KVM_PPC_GET_HTAB_FD, returns a file descriptor. Reads on this fd return the contents of the HPT (hashed page table), writes create and/or remove entries in the HPT. There is a new capability, KVM_CAP_PPC_HTAB_FD, to indicate the presence of the ioctl. The ioctl takes an argument structure with the index of the first HPT entry to read out and a set of flags. The flags indicate whether the user is intending to read or write the HPT, and whether to return all entries or only the "bolted" entries (those with the bolted bit, 0x10, set in the first doubleword). This is intended for use in implementing qemu's savevm/loadvm and for live migration. Therefore, on reads, the first pass returns information about all HPTEs (or all bolted HPTEs). When the first pass reaches the end of the HPT, it returns from the read. Subsequent reads only return information about HPTEs that have changed since they were last read. A read that finds no changed HPTEs in the HPT following where the last read finished will return 0 bytes. The format of the data provides a simple run-length compression of the invalid entries. Each block of data starts with a header that indicates the index (position in the HPT, which is just an array), the number of valid entries starting at that index (may be zero), and the number of invalid entries following those valid entries. The valid entries, 16 bytes each, follow the header. The invalid entries are not explicitly represented. Signed-off-by: Paul Mackerras <paulus@samba.org> [agraf: fix documentation] Signed-off-by: Alexander Graf <agraf@suse.de>
2012-11-19 22:57:20 +00:00
/* For KVM_PPC_GET_HTAB_FD */
struct kvm_get_htab_fd {
__u64 flags;
__u64 start_index;
__u64 reserved[2];
};
/* Values for kvm_get_htab_fd.flags */
#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
#define KVM_GET_HTAB_WRITE ((__u64)0x2)
/*
* Data read on the file descriptor is formatted as a series of
* records, each consisting of a header followed by a series of
* `n_valid' HPTEs (16 bytes each), which are all valid. Following
* those valid HPTEs there are `n_invalid' invalid HPTEs, which
* are not represented explicitly in the stream. The same format
* is used for writing.
*/
struct kvm_get_htab_header {
__u32 index;
__u16 n_valid;
__u16 n_invalid;
};
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
/* 32 floating-point registers */
#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n))
#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
/* 32 VMX/Altivec vector registers */
#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n))
#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
/* 32 double-width FP registers for VSX */
/* High-order halves overlap with FP regs */
#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n))
#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
/* FP and vector status/control registers */
#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
/* Virtual processor areas */
/* For SLB & DTL, address in high (first) half, length in low half */
#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
#endif /* __LINUX_KVM_POWERPC_H */