Merge tag 'kvm-x86-svm-6.19' of https://github.com/kvm-x86/linux into HEAD
KVM SVM changes for 6.19: - Fix a few missing "VMCB dirty" bugs. - Fix the worst of KVM's lack of EFER.LMSLE emulation. - Add AVIC support for addressing 4k vCPUs in x2AVIC mode. - Fix incorrect handling of selective CR0 writes when checking intercepts during emulation of L2 instructions. - Fix a currently-benign bug where KVM would clobber SPEC_CTRL[63:32] on VMRUN and #VMEXIT. - Fix a bug where KVM corrupt the guest code stream when re-injecting a soft interrupt if the guest patched the underlying code after the VM-Exit, e.g. when Linux patches code with a temporary INT3. - Add KVM_X86_SNP_POLICY_BITS to advertise supported SNP policy bits to userspace, and extend KVM "support" to all policy bits that don't require any actual support from KVM.
This commit is contained in:
@@ -48,7 +48,14 @@ versus "has_error_code", i.e. KVM's ABI follows AMD behavior.
|
||||
Nested virtualization features
|
||||
------------------------------
|
||||
|
||||
TBD
|
||||
On AMD CPUs, when GIF is cleared, #DB exceptions or traps due to a breakpoint
|
||||
register match are ignored and discarded by the CPU. The CPU relies on the VMM
|
||||
to fully virtualize this behavior, even when vGIF is enabled for the guest
|
||||
(i.e. vGIF=0 does not cause the CPU to drop #DBs when the guest is running).
|
||||
KVM does not virtualize this behavior as the complexity is unjustified given
|
||||
the rarity of the use case. One way to handle this would be for KVM to
|
||||
intercept the #DB, temporarily disable the breakpoint, single-step over the
|
||||
instruction, then re-enable the breakpoint.
|
||||
|
||||
x2APIC
|
||||
------
|
||||
|
||||
@@ -338,6 +338,7 @@
|
||||
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
|
||||
#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
|
||||
#define X86_FEATURE_EFER_LMSLE_MBZ (13*32+20) /* EFER.LMSLE must be zero */
|
||||
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
|
||||
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
|
||||
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
|
||||
@@ -504,6 +505,7 @@
|
||||
* can access host MMIO (ignored for all intents
|
||||
* and purposes if CLEAR_CPU_BUF_VM is set).
|
||||
*/
|
||||
#define X86_FEATURE_X2AVIC_EXT (21*32+18) /* AMD SVM x2AVIC support for 4k vCPUs */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
|
||||
@@ -2139,6 +2139,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
|
||||
* the gfn, i.e. retrying the instruction will hit a
|
||||
* !PRESENT fault, which results in a new shadow page
|
||||
* and sends KVM back to square one.
|
||||
*
|
||||
* EMULTYPE_SKIP_SOFT_INT - Set in combination with EMULTYPE_SKIP to only skip
|
||||
* an instruction if it could generate a given software
|
||||
* interrupt, which must be encoded via
|
||||
* EMULTYPE_SET_SOFT_INT_VECTOR().
|
||||
*/
|
||||
#define EMULTYPE_NO_DECODE (1 << 0)
|
||||
#define EMULTYPE_TRAP_UD (1 << 1)
|
||||
@@ -2149,6 +2154,10 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
|
||||
#define EMULTYPE_PF (1 << 6)
|
||||
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
|
||||
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
|
||||
#define EMULTYPE_SKIP_SOFT_INT (1 << 9)
|
||||
|
||||
#define EMULTYPE_SET_SOFT_INT_VECTOR(v) ((u32)((v) & 0xff) << 16)
|
||||
#define EMULTYPE_GET_SOFT_INT_VECTOR(e) (((e) >> 16) & 0xff)
|
||||
|
||||
static inline bool kvm_can_emulate_event_vectoring(int emul_type)
|
||||
{
|
||||
|
||||
@@ -279,7 +279,7 @@ enum avic_ipi_failure_cause {
|
||||
AVIC_IPI_FAILURE_INVALID_IPI_VECTOR,
|
||||
};
|
||||
|
||||
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(8, 0)
|
||||
#define AVIC_PHYSICAL_MAX_INDEX_MASK GENMASK_ULL(11, 0)
|
||||
|
||||
/*
|
||||
* For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
|
||||
@@ -289,11 +289,14 @@ enum avic_ipi_failure_cause {
|
||||
|
||||
/*
|
||||
* For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
|
||||
* With X86_FEATURE_X2AVIC_EXT, the max index is increased to 0xfff (4095).
|
||||
*/
|
||||
#define X2AVIC_MAX_PHYSICAL_ID 0x1FFUL
|
||||
#define X2AVIC_4K_MAX_PHYSICAL_ID 0xFFFUL
|
||||
|
||||
static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
|
||||
static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
|
||||
static_assert((X2AVIC_4K_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_4K_MAX_PHYSICAL_ID);
|
||||
|
||||
#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
|
||||
#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
|
||||
|
||||
@@ -502,6 +502,7 @@ struct kvm_sync_regs {
|
||||
/* vendor-specific groups and attributes for system fd */
|
||||
#define KVM_X86_GRP_SEV 1
|
||||
# define KVM_X86_SEV_VMSA_FEATURES 0
|
||||
# define KVM_X86_SNP_POLICY_BITS 1
|
||||
|
||||
struct kvm_vmx_nested_state_data {
|
||||
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
|
||||
|
||||
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
|
||||
{ X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
|
||||
{ X86_FEATURE_AMD_FAST_CPPC, CPUID_EDX, 15, 0x80000007, 0 },
|
||||
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
|
||||
{ X86_FEATURE_X2AVIC_EXT, CPUID_ECX, 6, 0x8000000a, 0 },
|
||||
{ X86_FEATURE_COHERENCY_SFW_NO, CPUID_EBX, 31, 0x8000001f, 0 },
|
||||
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
|
||||
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
|
||||
|
||||
@@ -1135,6 +1135,7 @@ void kvm_set_cpu_caps(void)
|
||||
F(AMD_STIBP),
|
||||
F(AMD_STIBP_ALWAYS_ON),
|
||||
F(AMD_IBRS_SAME_MODE),
|
||||
PASSTHROUGH_F(EFER_LMSLE_MBZ),
|
||||
F(AMD_PSFD),
|
||||
F(AMD_IBPB_RET),
|
||||
);
|
||||
|
||||
@@ -106,7 +106,7 @@ static u32 next_vm_id = 0;
|
||||
static bool next_vm_id_wrapped = 0;
|
||||
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
|
||||
static bool x2avic_enabled;
|
||||
|
||||
static u32 x2avic_max_physical_id;
|
||||
|
||||
static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
|
||||
bool intercept)
|
||||
@@ -158,12 +158,40 @@ static void avic_set_x2apic_msr_interception(struct vcpu_svm *svm,
|
||||
svm->x2avic_msrs_intercepted = intercept;
|
||||
}
|
||||
|
||||
static u32 __avic_get_max_physical_id(struct kvm *kvm, struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 arch_max;
|
||||
|
||||
/*
|
||||
* Return the largest size (x2APIC) when querying without a vCPU, e.g.
|
||||
* to allocate the per-VM table..
|
||||
*/
|
||||
if (x2avic_enabled && (!vcpu || apic_x2apic_mode(vcpu->arch.apic)))
|
||||
arch_max = x2avic_max_physical_id;
|
||||
else
|
||||
arch_max = AVIC_MAX_PHYSICAL_ID;
|
||||
|
||||
/*
|
||||
* Despite its name, KVM_CAP_MAX_VCPU_ID represents the maximum APIC ID
|
||||
* plus one, so the max possible APIC ID is one less than that.
|
||||
*/
|
||||
return min(kvm->arch.max_vcpu_ids - 1, arch_max);
|
||||
}
|
||||
|
||||
static u32 avic_get_max_physical_id(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return __avic_get_max_physical_id(vcpu->kvm, vcpu);
|
||||
}
|
||||
|
||||
static void avic_activate_vmcb(struct vcpu_svm *svm)
|
||||
{
|
||||
struct vmcb *vmcb = svm->vmcb01.ptr;
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
|
||||
vmcb->control.int_ctl &= ~(AVIC_ENABLE_MASK | X2APIC_MODE_MASK);
|
||||
|
||||
vmcb->control.avic_physical_id &= ~AVIC_PHYSICAL_MAX_INDEX_MASK;
|
||||
vmcb->control.avic_physical_id |= avic_get_max_physical_id(vcpu);
|
||||
|
||||
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
|
||||
|
||||
@@ -176,7 +204,7 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
|
||||
*/
|
||||
if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
|
||||
vmcb->control.int_ctl |= X2APIC_MODE_MASK;
|
||||
vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
|
||||
|
||||
/* Disabling MSR intercept for x2APIC registers */
|
||||
avic_set_x2apic_msr_interception(svm, false);
|
||||
} else {
|
||||
@@ -186,8 +214,6 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
|
||||
*/
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
|
||||
|
||||
/* For xAVIC and hybrid-xAVIC modes */
|
||||
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
|
||||
/* Enabling MSR intercept for x2APIC registers */
|
||||
avic_set_x2apic_msr_interception(svm, true);
|
||||
}
|
||||
@@ -247,6 +273,30 @@ static int avic_ga_log_notifier(u32 ga_tag)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int avic_get_physical_id_table_order(struct kvm *kvm)
|
||||
{
|
||||
/* Provision for the maximum physical ID supported in x2avic mode */
|
||||
return get_order((__avic_get_max_physical_id(kvm, NULL) + 1) * sizeof(u64));
|
||||
}
|
||||
|
||||
int avic_alloc_physical_id_table(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
|
||||
|
||||
if (!irqchip_in_kernel(kvm) || !enable_apicv)
|
||||
return 0;
|
||||
|
||||
if (kvm_svm->avic_physical_id_table)
|
||||
return 0;
|
||||
|
||||
kvm_svm->avic_physical_id_table = (void *)__get_free_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
|
||||
avic_get_physical_id_table_order(kvm));
|
||||
if (!kvm_svm->avic_physical_id_table)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void avic_vm_destroy(struct kvm *kvm)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -256,7 +306,8 @@ void avic_vm_destroy(struct kvm *kvm)
|
||||
return;
|
||||
|
||||
free_page((unsigned long)kvm_svm->avic_logical_id_table);
|
||||
free_page((unsigned long)kvm_svm->avic_physical_id_table);
|
||||
free_pages((unsigned long)kvm_svm->avic_physical_id_table,
|
||||
avic_get_physical_id_table_order(kvm));
|
||||
|
||||
spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
|
||||
hash_del(&kvm_svm->hnode);
|
||||
@@ -274,10 +325,6 @@ int avic_vm_init(struct kvm *kvm)
|
||||
if (!enable_apicv)
|
||||
return 0;
|
||||
|
||||
kvm_svm->avic_physical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!kvm_svm->avic_physical_id_table)
|
||||
goto free_avic;
|
||||
|
||||
kvm_svm->avic_logical_id_table = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
|
||||
if (!kvm_svm->avic_logical_id_table)
|
||||
goto free_avic;
|
||||
@@ -342,7 +389,7 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
|
||||
* fully initialized AVIC.
|
||||
*/
|
||||
if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
|
||||
(id > X2AVIC_MAX_PHYSICAL_ID)) {
|
||||
(id > x2avic_max_physical_id)) {
|
||||
kvm_set_apicv_inhibit(vcpu->kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG);
|
||||
vcpu->arch.apic->apicv_active = false;
|
||||
return 0;
|
||||
@@ -562,7 +609,7 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
|
||||
u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
|
||||
u32 icrl = svm->vmcb->control.exit_info_1;
|
||||
u32 id = svm->vmcb->control.exit_info_2 >> 32;
|
||||
u32 index = svm->vmcb->control.exit_info_2 & 0x1FF;
|
||||
u32 index = svm->vmcb->control.exit_info_2 & AVIC_PHYSICAL_MAX_INDEX_MASK;
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
|
||||
@@ -962,7 +1009,8 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu,
|
||||
if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE))
|
||||
if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >=
|
||||
PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm)))
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -1024,7 +1072,8 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action)
|
||||
|
||||
lockdep_assert_preemption_disabled();
|
||||
|
||||
if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >= PAGE_SIZE))
|
||||
if (WARN_ON_ONCE(vcpu->vcpu_id * sizeof(entry) >=
|
||||
PAGE_SIZE << avic_get_physical_id_table_order(vcpu->kvm)))
|
||||
return;
|
||||
|
||||
/*
|
||||
@@ -1226,10 +1275,15 @@ bool __init avic_hardware_setup(void)
|
||||
|
||||
/* AVIC is a prerequisite for x2AVIC. */
|
||||
x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
|
||||
if (x2avic_enabled)
|
||||
pr_info("x2AVIC enabled\n");
|
||||
else
|
||||
if (x2avic_enabled) {
|
||||
if (cpu_feature_enabled(X86_FEATURE_X2AVIC_EXT))
|
||||
x2avic_max_physical_id = X2AVIC_4K_MAX_PHYSICAL_ID;
|
||||
else
|
||||
x2avic_max_physical_id = X2AVIC_MAX_PHYSICAL_ID;
|
||||
pr_info("x2AVIC enabled (max %u vCPUs)\n", x2avic_max_physical_id + 1);
|
||||
} else {
|
||||
svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable IPI virtualization for AMD Family 17h CPUs (Zen1 and Zen2)
|
||||
|
||||
@@ -613,6 +613,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
|
||||
struct kvm_vcpu *vcpu = &svm->vcpu;
|
||||
|
||||
nested_vmcb02_compute_g_pat(svm);
|
||||
vmcb_mark_dirty(vmcb02, VMCB_NPT);
|
||||
|
||||
/* Load the nested guest state */
|
||||
if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
|
||||
@@ -751,6 +752,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
|
||||
vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
|
||||
vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
|
||||
vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
|
||||
vmcb_mark_dirty(vmcb02, VMCB_PERM_MAP);
|
||||
|
||||
/*
|
||||
* Stash vmcb02's counter if the guest hasn't moved past the guilty
|
||||
@@ -1430,16 +1432,6 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
|
||||
case SVM_EXIT_IOIO:
|
||||
vmexit = nested_svm_intercept_ioio(svm);
|
||||
break;
|
||||
case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
|
||||
if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
|
||||
vmexit = NESTED_EXIT_DONE;
|
||||
break;
|
||||
}
|
||||
case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
|
||||
if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
|
||||
vmexit = NESTED_EXIT_DONE;
|
||||
break;
|
||||
}
|
||||
case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
|
||||
/*
|
||||
* Host-intercepted exceptions have been checked already in
|
||||
|
||||
@@ -65,20 +65,24 @@ module_param_named(ciphertext_hiding_asids, nr_ciphertext_hiding_asids, uint, 04
|
||||
#define AP_RESET_HOLD_NAE_EVENT 1
|
||||
#define AP_RESET_HOLD_MSR_PROTO 2
|
||||
|
||||
/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */
|
||||
#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0)
|
||||
#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8)
|
||||
#define SNP_POLICY_MASK_SMT BIT_ULL(16)
|
||||
#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17)
|
||||
#define SNP_POLICY_MASK_DEBUG BIT_ULL(19)
|
||||
#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20)
|
||||
/*
|
||||
* SEV-SNP policy bits that can be supported by KVM. These include policy bits
|
||||
* that have implementation support within KVM or policy bits that do not
|
||||
* require implementation support within KVM to enforce the policy.
|
||||
*/
|
||||
#define KVM_SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \
|
||||
SNP_POLICY_MASK_API_MAJOR | \
|
||||
SNP_POLICY_MASK_SMT | \
|
||||
SNP_POLICY_MASK_RSVD_MBO | \
|
||||
SNP_POLICY_MASK_DEBUG | \
|
||||
SNP_POLICY_MASK_SINGLE_SOCKET | \
|
||||
SNP_POLICY_MASK_CXL_ALLOW | \
|
||||
SNP_POLICY_MASK_MEM_AES_256_XTS | \
|
||||
SNP_POLICY_MASK_RAPL_DIS | \
|
||||
SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM | \
|
||||
SNP_POLICY_MASK_PAGE_SWAP_DISABLE)
|
||||
|
||||
#define SNP_POLICY_MASK_VALID (SNP_POLICY_MASK_API_MINOR | \
|
||||
SNP_POLICY_MASK_API_MAJOR | \
|
||||
SNP_POLICY_MASK_SMT | \
|
||||
SNP_POLICY_MASK_RSVD_MBO | \
|
||||
SNP_POLICY_MASK_DEBUG | \
|
||||
SNP_POLICY_MASK_SINGLE_SOCKET)
|
||||
static u64 snp_supported_policy_bits __ro_after_init;
|
||||
|
||||
#define INITIAL_VMSA_GPA 0xFFFFFFFFF000
|
||||
|
||||
@@ -2143,6 +2147,10 @@ int sev_dev_get_attr(u32 group, u64 attr, u64 *val)
|
||||
*val = sev_supported_vmsa_features;
|
||||
return 0;
|
||||
|
||||
case KVM_X86_SNP_POLICY_BITS:
|
||||
*val = snp_supported_policy_bits;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return -ENXIO;
|
||||
}
|
||||
@@ -2207,7 +2215,7 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||
if (params.flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (params.policy & ~SNP_POLICY_MASK_VALID)
|
||||
if (params.policy & ~snp_supported_policy_bits)
|
||||
return -EINVAL;
|
||||
|
||||
/* Check for policy bits that must be set */
|
||||
@@ -3100,8 +3108,11 @@ out:
|
||||
else if (sev_snp_supported)
|
||||
sev_snp_supported = is_sev_snp_initialized();
|
||||
|
||||
if (sev_snp_supported)
|
||||
if (sev_snp_supported) {
|
||||
snp_supported_policy_bits = sev_get_snp_policy_bits() &
|
||||
KVM_SNP_POLICY_MASK_VALID;
|
||||
nr_ciphertext_hiding_asids = init_args.max_snp_asid;
|
||||
}
|
||||
|
||||
/*
|
||||
* If ciphertext hiding is enabled, the joint SEV-ES/SEV-SNP
|
||||
@@ -5085,10 +5096,10 @@ struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
|
||||
|
||||
/* Check if the SEV policy allows debugging */
|
||||
if (sev_snp_guest(vcpu->kvm)) {
|
||||
if (!(sev->policy & SNP_POLICY_DEBUG))
|
||||
if (!(sev->policy & SNP_POLICY_MASK_DEBUG))
|
||||
return NULL;
|
||||
} else {
|
||||
if (sev->policy & SEV_POLICY_NODBG)
|
||||
if (sev->policy & SEV_POLICY_MASK_NODBG)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -272,6 +272,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
|
||||
}
|
||||
|
||||
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
|
||||
int emul_type,
|
||||
bool commit_side_effects)
|
||||
{
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
@@ -293,7 +294,7 @@ static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
|
||||
if (unlikely(!commit_side_effects))
|
||||
old_rflags = svm->vmcb->save.rflags;
|
||||
|
||||
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
|
||||
if (!kvm_emulate_instruction(vcpu, emul_type))
|
||||
return 0;
|
||||
|
||||
if (unlikely(!commit_side_effects))
|
||||
@@ -311,11 +312,13 @@ done:
|
||||
|
||||
static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return __svm_skip_emulated_instruction(vcpu, true);
|
||||
return __svm_skip_emulated_instruction(vcpu, EMULTYPE_SKIP, true);
|
||||
}
|
||||
|
||||
static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
|
||||
static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu, u8 vector)
|
||||
{
|
||||
const int emul_type = EMULTYPE_SKIP | EMULTYPE_SKIP_SOFT_INT |
|
||||
EMULTYPE_SET_SOFT_INT_VECTOR(vector);
|
||||
unsigned long rip, old_rip = kvm_rip_read(vcpu);
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
|
||||
@@ -331,7 +334,7 @@ static int svm_update_soft_interrupt_rip(struct kvm_vcpu *vcpu)
|
||||
* in use, the skip must not commit any side effects such as clearing
|
||||
* the interrupt shadow or RFLAGS.RF.
|
||||
*/
|
||||
if (!__svm_skip_emulated_instruction(vcpu, !nrips))
|
||||
if (!__svm_skip_emulated_instruction(vcpu, emul_type, !nrips))
|
||||
return -EIO;
|
||||
|
||||
rip = kvm_rip_read(vcpu);
|
||||
@@ -367,7 +370,7 @@ static void svm_inject_exception(struct kvm_vcpu *vcpu)
|
||||
kvm_deliver_exception_payload(vcpu, ex);
|
||||
|
||||
if (kvm_exception_is_soft(ex->vector) &&
|
||||
svm_update_soft_interrupt_rip(vcpu))
|
||||
svm_update_soft_interrupt_rip(vcpu, ex->vector))
|
||||
return;
|
||||
|
||||
svm->vmcb->control.event_inj = ex->vector
|
||||
@@ -1198,6 +1201,11 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
|
||||
svm->vmcb = target_vmcb->ptr;
|
||||
}
|
||||
|
||||
static int svm_vcpu_precreate(struct kvm *kvm)
|
||||
{
|
||||
return avic_alloc_physical_id_table(kvm);
|
||||
}
|
||||
|
||||
static int svm_vcpu_create(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_svm *svm;
|
||||
@@ -3628,11 +3636,12 @@ static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
|
||||
|
||||
static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
|
||||
{
|
||||
struct kvm_queued_interrupt *intr = &vcpu->arch.interrupt;
|
||||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
u32 type;
|
||||
|
||||
if (vcpu->arch.interrupt.soft) {
|
||||
if (svm_update_soft_interrupt_rip(vcpu))
|
||||
if (intr->soft) {
|
||||
if (svm_update_soft_interrupt_rip(vcpu, intr->nr))
|
||||
return;
|
||||
|
||||
type = SVM_EVTINJ_TYPE_SOFT;
|
||||
@@ -3640,12 +3649,10 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
|
||||
type = SVM_EVTINJ_TYPE_INTR;
|
||||
}
|
||||
|
||||
trace_kvm_inj_virq(vcpu->arch.interrupt.nr,
|
||||
vcpu->arch.interrupt.soft, reinjected);
|
||||
trace_kvm_inj_virq(intr->nr, intr->soft, reinjected);
|
||||
++vcpu->stat.irq_injections;
|
||||
|
||||
svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
|
||||
SVM_EVTINJ_VALID | type;
|
||||
svm->vmcb->control.event_inj = intr->nr | SVM_EVTINJ_VALID | type;
|
||||
}
|
||||
|
||||
void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
|
||||
@@ -4511,31 +4518,45 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
|
||||
case SVM_EXIT_WRITE_CR0: {
|
||||
unsigned long cr0, val;
|
||||
|
||||
if (info->intercept == x86_intercept_cr_write)
|
||||
/*
|
||||
* Adjust the exit code accordingly if a CR other than CR0 is
|
||||
* being written, and skip straight to the common handling as
|
||||
* only CR0 has an additional selective intercept.
|
||||
*/
|
||||
if (info->intercept == x86_intercept_cr_write && info->modrm_reg) {
|
||||
icpt_info.exit_code += info->modrm_reg;
|
||||
|
||||
if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0 ||
|
||||
info->intercept == x86_intercept_clts)
|
||||
break;
|
||||
|
||||
if (!(vmcb12_is_intercept(&svm->nested.ctl,
|
||||
INTERCEPT_SELECTIVE_CR0)))
|
||||
break;
|
||||
|
||||
cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
|
||||
val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
|
||||
|
||||
if (info->intercept == x86_intercept_lmsw) {
|
||||
cr0 &= 0xfUL;
|
||||
val &= 0xfUL;
|
||||
/* lmsw can't clear PE - catch this here */
|
||||
if (cr0 & X86_CR0_PE)
|
||||
val |= X86_CR0_PE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert the exit_code to SVM_EXIT_CR0_SEL_WRITE if a
|
||||
* selective CR0 intercept is triggered (the common logic will
|
||||
* treat the selective intercept as being enabled). Note, the
|
||||
* unconditional intercept has higher priority, i.e. this is
|
||||
* only relevant if *only* the selective intercept is enabled.
|
||||
*/
|
||||
if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_CR0_WRITE) ||
|
||||
!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SELECTIVE_CR0)))
|
||||
break;
|
||||
|
||||
/* CLTS never triggers INTERCEPT_SELECTIVE_CR0 */
|
||||
if (info->intercept == x86_intercept_clts)
|
||||
break;
|
||||
|
||||
/* LMSW always triggers INTERCEPT_SELECTIVE_CR0 */
|
||||
if (info->intercept == x86_intercept_lmsw) {
|
||||
icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* MOV-to-CR0 only triggers INTERCEPT_SELECTIVE_CR0 if any bit
|
||||
* other than SVM_CR0_SELECTIVE_MASK is changed.
|
||||
*/
|
||||
cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
|
||||
val = info->src_val & ~SVM_CR0_SELECTIVE_MASK;
|
||||
if (cr0 ^ val)
|
||||
icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
|
||||
|
||||
break;
|
||||
}
|
||||
case SVM_EXIT_READ_DR0:
|
||||
@@ -5005,6 +5026,7 @@ struct kvm_x86_ops svm_x86_ops __initdata = {
|
||||
.emergency_disable_virtualization_cpu = svm_emergency_disable_virtualization_cpu,
|
||||
.has_emulated_msr = svm_has_emulated_msr,
|
||||
|
||||
.vcpu_precreate = svm_vcpu_precreate,
|
||||
.vcpu_create = svm_vcpu_create,
|
||||
.vcpu_free = svm_vcpu_free,
|
||||
.vcpu_reset = svm_vcpu_reset,
|
||||
@@ -5309,7 +5331,9 @@ static __init int svm_hardware_setup(void)
|
||||
|
||||
if (nested) {
|
||||
pr_info("Nested Virtualization enabled\n");
|
||||
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
|
||||
kvm_enable_efer_bits(EFER_SVME);
|
||||
if (!boot_cpu_has(X86_FEATURE_EFER_LMSLE_MBZ))
|
||||
kvm_enable_efer_bits(EFER_LMSLE);
|
||||
|
||||
r = nested_svm_init_msrpm_merge_offsets();
|
||||
if (r)
|
||||
|
||||
@@ -117,9 +117,6 @@ struct kvm_sev_info {
|
||||
cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
|
||||
};
|
||||
|
||||
#define SEV_POLICY_NODBG BIT_ULL(0)
|
||||
#define SNP_POLICY_DEBUG BIT_ULL(19)
|
||||
|
||||
struct kvm_svm {
|
||||
struct kvm kvm;
|
||||
|
||||
@@ -807,6 +804,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops;
|
||||
|
||||
bool __init avic_hardware_setup(void);
|
||||
void avic_hardware_unsetup(void);
|
||||
int avic_alloc_physical_id_table(struct kvm *kvm);
|
||||
void avic_vm_destroy(struct kvm *kvm);
|
||||
int avic_vm_init(struct kvm *kvm);
|
||||
void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
|
||||
|
||||
@@ -52,11 +52,23 @@
|
||||
* there must not be any returns or indirect branches between this code
|
||||
* and vmentry.
|
||||
*/
|
||||
movl SVM_spec_ctrl(%_ASM_DI), %eax
|
||||
cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
|
||||
#ifdef CONFIG_X86_64
|
||||
mov SVM_spec_ctrl(%rdi), %rdx
|
||||
cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx
|
||||
je 801b
|
||||
movl %edx, %eax
|
||||
shr $32, %rdx
|
||||
#else
|
||||
mov SVM_spec_ctrl(%edi), %eax
|
||||
mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx
|
||||
xor %eax, %ecx
|
||||
mov SVM_spec_ctrl + 4(%edi), %edx
|
||||
mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %esi
|
||||
xor %edx, %esi
|
||||
or %esi, %ecx
|
||||
je 801b
|
||||
#endif
|
||||
mov $MSR_IA32_SPEC_CTRL, %ecx
|
||||
xor %edx, %edx
|
||||
wrmsr
|
||||
jmp 801b
|
||||
.endm
|
||||
@@ -81,13 +93,25 @@
|
||||
jnz 998f
|
||||
rdmsr
|
||||
movl %eax, SVM_spec_ctrl(%_ASM_DI)
|
||||
movl %edx, SVM_spec_ctrl + 4(%_ASM_DI)
|
||||
998:
|
||||
|
||||
/* Now restore the host value of the MSR if different from the guest's. */
|
||||
movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
|
||||
cmp SVM_spec_ctrl(%_ASM_DI), %eax
|
||||
#ifdef CONFIG_X86_64
|
||||
mov PER_CPU_VAR(x86_spec_ctrl_current), %rdx
|
||||
cmp SVM_spec_ctrl(%rdi), %rdx
|
||||
je 901b
|
||||
xor %edx, %edx
|
||||
movl %edx, %eax
|
||||
shr $32, %rdx
|
||||
#else
|
||||
mov PER_CPU_VAR(x86_spec_ctrl_current), %eax
|
||||
mov SVM_spec_ctrl(%edi), %esi
|
||||
xor %eax, %esi
|
||||
mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edx
|
||||
mov SVM_spec_ctrl + 4(%edi), %edi
|
||||
xor %edx, %edi
|
||||
or %edi, %esi
|
||||
je 901b
|
||||
#endif
|
||||
wrmsr
|
||||
jmp 901b
|
||||
.endm
|
||||
@@ -136,7 +160,7 @@ SYM_FUNC_START(__svm_vcpu_run)
|
||||
mov %_ASM_ARG1, %_ASM_DI
|
||||
.endif
|
||||
|
||||
/* Clobbers RAX, RCX, RDX. */
|
||||
/* Clobbers RAX, RCX, RDX (and ESI on 32-bit), consumes RDI (@svm). */
|
||||
RESTORE_GUEST_SPEC_CTRL
|
||||
|
||||
/*
|
||||
@@ -213,7 +237,10 @@ SYM_FUNC_START(__svm_vcpu_run)
|
||||
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
|
||||
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
|
||||
|
||||
/* Clobbers RAX, RCX, RDX. */
|
||||
/*
|
||||
* Clobbers RAX, RCX, RDX (and ESI, EDI on 32-bit), consumes RDI (@svm)
|
||||
* and RSP (pointer to @spec_ctrl_intercepted).
|
||||
*/
|
||||
RESTORE_HOST_SPEC_CTRL
|
||||
|
||||
/*
|
||||
@@ -333,7 +360,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
|
||||
mov %rdi, SEV_ES_RDI (%rdx)
|
||||
mov %rsi, SEV_ES_RSI (%rdx)
|
||||
|
||||
/* Clobbers RAX, RCX, RDX (@hostsa). */
|
||||
/* Clobbers RAX, RCX, and RDX (@hostsa), consumes RDI (@svm). */
|
||||
RESTORE_GUEST_SPEC_CTRL
|
||||
|
||||
/* Get svm->current_vmcb->pa into RAX. */
|
||||
|
||||
@@ -9332,6 +9332,23 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_soft_int_instruction(struct x86_emulate_ctxt *ctxt,
|
||||
int emulation_type)
|
||||
{
|
||||
u8 vector = EMULTYPE_GET_SOFT_INT_VECTOR(emulation_type);
|
||||
|
||||
switch (ctxt->b) {
|
||||
case 0xcc:
|
||||
return vector == BP_VECTOR;
|
||||
case 0xcd:
|
||||
return vector == ctxt->src.val;
|
||||
case 0xce:
|
||||
return vector == OF_VECTOR;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode an instruction for emulation. The caller is responsible for handling
|
||||
* code breakpoints. Note, manually detecting code breakpoints is unnecessary
|
||||
@@ -9442,6 +9459,10 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
|
||||
* injecting single-step #DBs.
|
||||
*/
|
||||
if (emulation_type & EMULTYPE_SKIP) {
|
||||
if (emulation_type & EMULTYPE_SKIP_SOFT_INT &&
|
||||
!is_soft_int_instruction(ctxt, emulation_type))
|
||||
return 0;
|
||||
|
||||
if (ctxt->mode != X86EMUL_MODE_PROT64)
|
||||
ctxt->eip = (u32)ctxt->_eip;
|
||||
else
|
||||
|
||||
@@ -2777,6 +2777,43 @@ void sev_platform_shutdown(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sev_platform_shutdown);
|
||||
|
||||
u64 sev_get_snp_policy_bits(void)
|
||||
{
|
||||
struct psp_device *psp = psp_master;
|
||||
struct sev_device *sev;
|
||||
u64 policy_bits;
|
||||
|
||||
if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP))
|
||||
return 0;
|
||||
|
||||
if (!psp || !psp->sev_data)
|
||||
return 0;
|
||||
|
||||
sev = psp->sev_data;
|
||||
|
||||
policy_bits = SNP_POLICY_MASK_BASE;
|
||||
|
||||
if (sev->snp_plat_status.feature_info) {
|
||||
if (sev->snp_feat_info_0.ecx & SNP_RAPL_DISABLE_SUPPORTED)
|
||||
policy_bits |= SNP_POLICY_MASK_RAPL_DIS;
|
||||
|
||||
if (sev->snp_feat_info_0.ecx & SNP_CIPHER_TEXT_HIDING_SUPPORTED)
|
||||
policy_bits |= SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM;
|
||||
|
||||
if (sev->snp_feat_info_0.ecx & SNP_AES_256_XTS_POLICY_SUPPORTED)
|
||||
policy_bits |= SNP_POLICY_MASK_MEM_AES_256_XTS;
|
||||
|
||||
if (sev->snp_feat_info_0.ecx & SNP_CXL_ALLOW_POLICY_SUPPORTED)
|
||||
policy_bits |= SNP_POLICY_MASK_CXL_ALLOW;
|
||||
|
||||
if (sev_version_greater_or_equal(1, 58))
|
||||
policy_bits |= SNP_POLICY_MASK_PAGE_SWAP_DISABLE;
|
||||
}
|
||||
|
||||
return policy_bits;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sev_get_snp_policy_bits);
|
||||
|
||||
void sev_dev_destroy(struct psp_device *psp)
|
||||
{
|
||||
struct sev_device *sev = psp->sev_data;
|
||||
|
||||
@@ -14,6 +14,39 @@
|
||||
|
||||
#include <uapi/linux/psp-sev.h>
|
||||
|
||||
/* As defined by SEV API, under "Guest Policy". */
|
||||
#define SEV_POLICY_MASK_NODBG BIT(0)
|
||||
#define SEV_POLICY_MASK_NOKS BIT(1)
|
||||
#define SEV_POLICY_MASK_ES BIT(2)
|
||||
#define SEV_POLICY_MASK_NOSEND BIT(3)
|
||||
#define SEV_POLICY_MASK_DOMAIN BIT(4)
|
||||
#define SEV_POLICY_MASK_SEV BIT(5)
|
||||
#define SEV_POLICY_MASK_API_MAJOR GENMASK(23, 16)
|
||||
#define SEV_POLICY_MASK_API_MINOR GENMASK(31, 24)
|
||||
|
||||
/* As defined by SEV-SNP Firmware ABI, under "Guest Policy". */
|
||||
#define SNP_POLICY_MASK_API_MINOR GENMASK_ULL(7, 0)
|
||||
#define SNP_POLICY_MASK_API_MAJOR GENMASK_ULL(15, 8)
|
||||
#define SNP_POLICY_MASK_SMT BIT_ULL(16)
|
||||
#define SNP_POLICY_MASK_RSVD_MBO BIT_ULL(17)
|
||||
#define SNP_POLICY_MASK_MIGRATE_MA BIT_ULL(18)
|
||||
#define SNP_POLICY_MASK_DEBUG BIT_ULL(19)
|
||||
#define SNP_POLICY_MASK_SINGLE_SOCKET BIT_ULL(20)
|
||||
#define SNP_POLICY_MASK_CXL_ALLOW BIT_ULL(21)
|
||||
#define SNP_POLICY_MASK_MEM_AES_256_XTS BIT_ULL(22)
|
||||
#define SNP_POLICY_MASK_RAPL_DIS BIT_ULL(23)
|
||||
#define SNP_POLICY_MASK_CIPHERTEXT_HIDING_DRAM BIT_ULL(24)
|
||||
#define SNP_POLICY_MASK_PAGE_SWAP_DISABLE BIT_ULL(25)
|
||||
|
||||
/* Base SEV-SNP policy bitmask for minimum supported SEV firmware version */
|
||||
#define SNP_POLICY_MASK_BASE (SNP_POLICY_MASK_API_MINOR | \
|
||||
SNP_POLICY_MASK_API_MAJOR | \
|
||||
SNP_POLICY_MASK_SMT | \
|
||||
SNP_POLICY_MASK_RSVD_MBO | \
|
||||
SNP_POLICY_MASK_MIGRATE_MA | \
|
||||
SNP_POLICY_MASK_DEBUG | \
|
||||
SNP_POLICY_MASK_SINGLE_SOCKET)
|
||||
|
||||
#define SEV_FW_BLOB_MAX_SIZE 0x4000 /* 16KB */
|
||||
|
||||
/**
|
||||
@@ -849,7 +882,10 @@ struct snp_feature_info {
|
||||
u32 edx;
|
||||
} __packed;
|
||||
|
||||
#define SNP_RAPL_DISABLE_SUPPORTED BIT(2)
|
||||
#define SNP_CIPHER_TEXT_HIDING_SUPPORTED BIT(3)
|
||||
#define SNP_AES_256_XTS_POLICY_SUPPORTED BIT(4)
|
||||
#define SNP_CXL_ALLOW_POLICY_SUPPORTED BIT(5)
|
||||
|
||||
#ifdef CONFIG_CRYPTO_DEV_SP_PSP
|
||||
|
||||
@@ -995,6 +1031,7 @@ void *snp_alloc_firmware_page(gfp_t mask);
|
||||
void snp_free_firmware_page(void *addr);
|
||||
void sev_platform_shutdown(void);
|
||||
bool sev_is_snp_ciphertext_hiding_supported(void);
|
||||
u64 sev_get_snp_policy_bits(void);
|
||||
|
||||
#else /* !CONFIG_CRYPTO_DEV_SP_PSP */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user