From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/vmx: Revert "VMX: use a single, global APIC access page"
The claim "No accesses would ever go to this page." is false. A consequence
of how Intel's APIC Acceleration works, and Xen's choice to have per-domain
P2Ms (rather than per-vCPU P2Ms) means that the APIC page is fully read-write
to any vCPU which is not in xAPIC mode.
This reverts commit 58850b9074d3e7affdf3bc94c84e417ecfa4d165.
This is XSA-412 / CVE-2022-42327.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index d429d76c18c9..3f4276531322 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -66,7 +66,8 @@ boolean_param("force-ept", opt_force_ept);
static void vmx_ctxt_switch_from(struct vcpu *v);
static void vmx_ctxt_switch_to(struct vcpu *v);
-static int alloc_vlapic_mapping(void);
+static int vmx_alloc_vlapic_mapping(struct domain *d);
+static void vmx_free_vlapic_mapping(struct domain *d);
static void vmx_install_vlapic_mapping(struct vcpu *v);
static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr,
unsigned int flags);
@@ -77,8 +78,6 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content);
static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content);
static void vmx_invlpg(struct vcpu *v, unsigned long linear);
-static mfn_t __read_mostly apic_access_mfn = INVALID_MFN_INITIALIZER;
-
/* Values for domain's ->arch.hvm_domain.pi_ops.flags. */
#define PI_CSW_FROM (1u << 0)
#define PI_CSW_TO (1u << 1)
@@ -402,6 +401,7 @@ static int vmx_domain_initialise(struct domain *d)
.to = vmx_ctxt_switch_to,
.tail = vmx_do_resume,
};
+ int rc;
d->arch.ctxt_switch = &csw;
@@ -411,15 +411,24 @@ static int vmx_domain_initialise(struct domain *d)
*/
d->arch.hvm.vmx.exec_sp = is_hardware_domain(d) || opt_ept_exec_sp;
+ if ( (rc = vmx_alloc_vlapic_mapping(d)) != 0 )
+ return rc;
+
return 0;
}
+static void vmx_domain_relinquish_resources(struct domain *d)
+{
+ vmx_free_vlapic_mapping(d);
+}
+
static void domain_creation_finished(struct domain *d)
{
gfn_t gfn = gaddr_to_gfn(APIC_DEFAULT_PHYS_BASE);
+ mfn_t apic_access_mfn = d->arch.hvm.vmx.apic_access_mfn;
bool ipat;
- if ( !has_vlapic(d) || mfn_eq(apic_access_mfn, INVALID_MFN) )
+ if ( mfn_eq(apic_access_mfn, _mfn(0)) )
return;
ASSERT(epte_get_entry_emt(d, gfn, apic_access_mfn, 0, &ipat,
@@ -2481,6 +2490,7 @@ static struct hvm_function_table __initdata vmx_function_table = {
.cpu_up_prepare = vmx_cpu_up_prepare,
.cpu_dead = vmx_cpu_dead,
.domain_initialise = vmx_domain_initialise,
+ .domain_relinquish_resources = vmx_domain_relinquish_resources,
.domain_creation_finished = domain_creation_finished,
.vcpu_initialise = vmx_vcpu_initialise,
.vcpu_destroy = vmx_vcpu_destroy,
@@ -2731,7 +2741,7 @@ const struct hvm_function_table * __init start_vmx(void)
{
set_in_cr4(X86_CR4_VMXE);
- if ( vmx_vmcs_init() || alloc_vlapic_mapping() )
+ if ( vmx_vmcs_init() )
{
printk("VMX: failed to initialise.\n");
return NULL;
@@ -3305,36 +3315,55 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
return X86EMUL_EXCEPTION;
}
-static int __init alloc_vlapic_mapping(void)
+static int vmx_alloc_vlapic_mapping(struct domain *d)
{
struct page_info *pg;
mfn_t mfn;
- if ( !cpu_has_vmx_virtualize_apic_accesses )
+ if ( !has_vlapic(d) || !cpu_has_vmx_virtualize_apic_accesses )
return 0;
- pg = alloc_domheap_page(NULL, 0);
+ pg = alloc_domheap_page(d, MEMF_no_refcount);
if ( !pg )
return -ENOMEM;
- /*
- * Signal to shadow code that this page cannot be refcounted. This also
- * makes epte_get_entry_emt() recognize this page as "special".
- */
- page_suppress_refcounting(pg);
+ if ( !get_page_and_type(pg, d, PGT_writable_page) )
+ {
+ /*
+ * The domain can't possibly know about this page yet, so failure
+ * here is a clear indication of something fishy going on.
+ */
+ domain_crash(d);
+ return -ENODATA;
+ }
mfn = page_to_mfn(pg);
clear_domain_page(mfn);
- apic_access_mfn = mfn;
+ d->arch.hvm.vmx.apic_access_mfn = mfn;
return 0;
}
+static void vmx_free_vlapic_mapping(struct domain *d)
+{
+ mfn_t mfn = d->arch.hvm.vmx.apic_access_mfn;
+
+ d->arch.hvm.vmx.apic_access_mfn = _mfn(0);
+ if ( !mfn_eq(mfn, _mfn(0)) )
+ {
+ struct page_info *pg = mfn_to_page(mfn);
+
+ put_page_alloc_ref(pg);
+ put_page_and_type(pg);
+ }
+}
+
static void vmx_install_vlapic_mapping(struct vcpu *v)
{
+ mfn_t apic_access_mfn = v->domain->arch.hvm.vmx.apic_access_mfn;
paddr_t virt_page_ma, apic_page_ma;
- if ( !has_vlapic(v->domain) || mfn_eq(apic_access_mfn, INVALID_MFN) )
+ if ( mfn_eq(apic_access_mfn, _mfn(0)) )
return;
ASSERT(cpu_has_vmx_virtualize_apic_accesses);
diff --git a/xen/arch/x86/mm/shadow/set.c b/xen/arch/x86/mm/shadow/set.c
index 87e9c6eeb219..bd6c68b547c9 100644
--- a/xen/arch/x86/mm/shadow/set.c
+++ b/xen/arch/x86/mm/shadow/set.c
@@ -101,14 +101,6 @@ shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
owner = page_get_owner(pg);
}
- /*
- * Check whether refcounting is suppressed on this page. For example,
- * VMX'es APIC access MFN is just a surrogate page. It doesn't actually
- * get accessed, and hence there's no need to refcount it.
- */
- if ( pg && page_refcounting_suppressed(pg) )
- return 0;
-
if ( owner == dom_io )
owner = NULL;
diff --git a/xen/arch/x86/mm/shadow/types.h b/xen/arch/x86/mm/shadow/types.h
index 6970e7d6ea4a..814a4018535a 100644
--- a/xen/arch/x86/mm/shadow/types.h
+++ b/xen/arch/x86/mm/shadow/types.h
@@ -276,16 +276,9 @@ int shadow_set_l4e(struct domain *d, shadow_l4e_t *sl4e,
static void inline
shadow_put_page_from_l1e(shadow_l1e_t sl1e, struct domain *d)
{
- mfn_t mfn = shadow_l1e_get_mfn(sl1e);
-
if ( !shadow_mode_refcounts(d) )
return;
- if ( mfn_valid(mfn) &&
- /* See the respective comment in shadow_get_page_from_l1e(). */
- page_refcounting_suppressed(mfn_to_page(mfn)) )
- return;
-
put_page_from_l1e(sl1e, d);
}
diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
index 03c9ccf627ab..8073af323b96 100644
--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
@@ -58,6 +58,7 @@ struct ept_data {
#define _VMX_DOMAIN_PML_ENABLED 0
#define VMX_DOMAIN_PML_ENABLED (1ul << _VMX_DOMAIN_PML_ENABLED)
struct vmx_domain {
+ mfn_t apic_access_mfn;
/* VMX_DOMAIN_* */
unsigned int status;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 7bdf9c2290d8..e1bcea57a8f5 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -83,7 +83,7 @@
#define PGC_state_offlined PG_mask(2, 6)
#define PGC_state_free PG_mask(3, 6)
#define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
-/* Page is not reference counted (see below for caveats) */
+/* Page is not reference counted */
#define _PGC_extra PG_shift(7)
#define PGC_extra PG_mask(1, 7)
@@ -375,24 +375,6 @@ void zap_ro_mpt(mfn_t mfn);
bool is_iomem_page(mfn_t mfn);
-/*
- * Pages with no owner which may get passed to functions wanting to
- * refcount them can be marked PGC_extra to bypass this refcounting (which
- * would fail due to the lack of an owner).
- *
- * (For pages with owner PGC_extra has different meaning.)
- */
-static inline void page_suppress_refcounting(struct page_info *pg)
-{
- ASSERT(!page_get_owner(pg));
- pg->count_info |= PGC_extra;
-}
-
-static inline bool page_refcounting_suppressed(const struct page_info *pg)
-{
- return !page_get_owner(pg) && (pg->count_info & PGC_extra);
-}
-
struct platform_bad_page {
unsigned long mfn;
unsigned int order;