From afce7b2845e4f1fee296daa141c72e9a194554bd Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sep 26 2023 14:20:03 +0000 Subject: 3 security issues - arm32: The cache may not be properly cleaned/invalidated [XSA-437, CVE-2023-34321] - top-level shadow reference dropped too early for 64-bit PV guests [XSA-438, CVE-2023-34322] - x86/AMD: Divide speculative information leak [XSA-439, CVE-2023-20588] --- diff --git a/xen.spec b/xen.spec index dda5785..778a2e5 100644 --- a/xen.spec +++ b/xen.spec @@ -55,7 +55,7 @@ Summary: Xen is a virtual machine monitor Name: xen Version: 4.17.2 -Release: 1%{?dist} +Release: 2%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -112,6 +112,18 @@ Patch46: xen.efi.build.patch Patch47: xen.gcc13.fixes.patch Patch49: xen.python3.12.patch Patch50: xen.ocaml5.fixes.patch +Patch51: xsa437.patch +Patch52: xsa438-4.17.patch +Patch53: xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch +Patch54: xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch +Patch55: xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch +Patch56: xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch +Patch57: xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch +Patch58: xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch +Patch59: xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch +Patch60: xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch +Patch61: xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch +Patch62: xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch %if %build_qemutrad @@ -328,6 +340,18 @@ manage Xen virtual machines. %if "%dist" != ".fc38" %patch 50 -p1 %endif +%patch 51 -p1 +%patch 52 -p1 +%patch 53 -p1 +%patch 54 -p1 +%patch 55 -p1 +%patch 56 -p1 +%patch 57 -p1 +%patch 58 -p1 +%patch 59 -p1 +%patch 60 -p1 +%patch 61 -p1 +%patch 62 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -935,6 +959,13 @@ fi %endif %changelog +* Tue Sep 26 2023 Michael Young - 4.17.2-2 +- arm32: The cache may not be properly cleaned/invalidated [XSA-437, + CVE-2023-34321] +- top-level shadow reference dropped too early for 64-bit PV guests + [XSA-438, CVE-2023-34322] +- x86/AMD: Divide speculative information leak [XSA-439, CVE-2023-20588] + * Thu Aug 10 2023 Michael Young - 4.17.2-1 - update to xen-4.17.2 which includes x86/AMD: Speculative Return Stack Overflow [XSA-434, CVE-2023-20569] diff --git a/xsa437.patch b/xsa437.patch new file mode 100644 index 0000000..18c9f8f --- /dev/null +++ b/xsa437.patch @@ -0,0 +1,110 @@ +From 7fac5971340a13ca9458195305bcfe14df2e52d2 Mon Sep 17 00:00:00 2001 +From: Stefano Stabellini +Date: Thu, 17 Aug 2023 13:41:35 +0100 +Subject: [PATCH] xen/arm: page: Handle cache flush of an element at the top of + the address space + +The region that needs to be cleaned/invalidated may be at the top +of the address space. This means that 'end' (i.e. 'p + size') will +be 0 and therefore nothing will be cleaned/invalidated as the check +in the loop will always be false. + +On Arm64, we only support we only support up to 48-bit Virtual +address space. So this is not a concern there. However, for 32-bit, +the mapcache is using the last 2GB of the address space. Therefore +we may not clean/invalidate properly some pages. This could lead +to memory corruption or data leakage (the scrubbed value may +still sit in the cache when the guest could read directly the memory +and therefore read the old content). + +Rework invalidate_dcache_va_range(), clean_dcache_va_range(), +clean_and_invalidate_dcache_va_range() to handle a cache flush +with an element at the top of the address space. + +This is CVE-2023-34321 / XSA-437. + +Reported-by: Julien Grall +Signed-off-by: Stefano Stabellini +Signed-off-by: Julien Grall +Acked-by: Bertrand Marquis + +--- + xen/arch/arm/include/asm/page.h | 33 ++++++++++++++++++++------------- + 1 file changed, 20 insertions(+), 13 deletions(-) + +diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h +index e7cd62190c7f..d7fe770a5e49 100644 +--- a/xen/arch/arm/include/asm/page.h ++++ b/xen/arch/arm/include/asm/page.h +@@ -160,26 +160,25 @@ static inline size_t read_dcache_line_bytes(void) + + static inline int invalidate_dcache_va_range(const void *p, unsigned long size) + { +- const void *end = p + size; + size_t cacheline_mask = dcache_line_bytes - 1; + + dsb(sy); /* So the CPU issues all writes to the range */ + + if ( (uintptr_t)p & cacheline_mask ) + { ++ size -= dcache_line_bytes - ((uintptr_t)p & cacheline_mask); + p = (void *)((uintptr_t)p & ~cacheline_mask); + asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); + p += dcache_line_bytes; + } +- if ( (uintptr_t)end & cacheline_mask ) +- { +- end = (void *)((uintptr_t)end & ~cacheline_mask); +- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (end)); +- } + +- for ( ; p < end; p += dcache_line_bytes ) ++ for ( ; size >= dcache_line_bytes; ++ p += dcache_line_bytes, size -= dcache_line_bytes ) + asm volatile (__invalidate_dcache_one(0) : : "r" (p)); + ++ if ( size > 0 ) ++ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); ++ + dsb(sy); /* So we know the flushes happen before continuing */ + + return 0; +@@ -187,10 +186,14 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size) + + static inline int clean_dcache_va_range(const void *p, unsigned long size) + { +- const void *end = p + size; ++ size_t cacheline_mask = dcache_line_bytes - 1; ++ + dsb(sy); /* So the CPU issues all writes to the range */ +- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1)); +- for ( ; p < end; p += dcache_line_bytes ) ++ size += (uintptr_t)p & cacheline_mask; ++ size = (size + cacheline_mask) & ~cacheline_mask; ++ p = (void *)((uintptr_t)p & ~cacheline_mask); ++ for ( ; size >= dcache_line_bytes; ++ p += dcache_line_bytes, size -= dcache_line_bytes ) + asm volatile (__clean_dcache_one(0) : : "r" (p)); + dsb(sy); /* So we know the flushes happen before continuing */ + /* ARM callers assume that dcache_* functions cannot fail. */ +@@ -200,10 +203,14 @@ static inline int clean_dcache_va_range(const void *p, unsigned long size) + static inline int clean_and_invalidate_dcache_va_range + (const void *p, unsigned long size) + { +- const void *end = p + size; ++ size_t cacheline_mask = dcache_line_bytes - 1; ++ + dsb(sy); /* So the CPU issues all writes to the range */ +- p = (void *)((uintptr_t)p & ~(dcache_line_bytes - 1)); +- for ( ; p < end; p += dcache_line_bytes ) ++ size += (uintptr_t)p & cacheline_mask; ++ size = (size + cacheline_mask) & ~cacheline_mask; ++ p = (void *)((uintptr_t)p & ~cacheline_mask); ++ for ( ; size >= dcache_line_bytes; ++ p += dcache_line_bytes, size -= dcache_line_bytes ) + asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); + dsb(sy); /* So we know the flushes happen before continuing */ + /* ARM callers assume that dcache_* functions cannot fail. */ +-- +2.40.1 + diff --git a/xsa438-4.17.patch b/xsa438-4.17.patch new file mode 100644 index 0000000..12d6ec7 --- /dev/null +++ b/xsa438-4.17.patch @@ -0,0 +1,416 @@ +From: Jan Beulich +Subject: x86/shadow: defer releasing of PV's top-level shadow reference + +sh_set_toplevel_shadow() re-pinning the top-level shadow we may be +running on is not enough (and at the same time unnecessary when the +shadow isn't what we're running on): That shadow becomes eligible for +blowing away (from e.g. shadow_prealloc()) immediately after the +paging lock was dropped. Yet it needs to remain valid until the actual +page table switch occurred. + +Propagate up the call chain the shadow entry that needs releasing +eventually, and carry out the release immediately after switching page +tables. Handle update_cr3() failures by switching to idle pagetables. +Note that various further uses of update_cr3() are HVM-only or only act +on paused vCPU-s, in which case sh_set_toplevel_shadow() will not defer +releasing of the reference. + +While changing the update_cr3() hook, also convert the "do_locking" +parameter to boolean. + +This is CVE-2023-34322 / XSA-438. + +Reported-by: Tim Deegan +Signed-off-by: Jan Beulich +Reviewed-by: George Dunlap + +--- a/xen/arch/x86/include/asm/mm.h ++++ b/xen/arch/x86/include/asm/mm.h +@@ -552,7 +552,7 @@ void audit_domains(void); + #endif + + void make_cr3(struct vcpu *v, mfn_t mfn); +-void update_cr3(struct vcpu *v); ++pagetable_t update_cr3(struct vcpu *v); + int vcpu_destroy_pagetables(struct vcpu *); + void *do_page_walk(struct vcpu *v, unsigned long addr); + +--- a/xen/arch/x86/include/asm/paging.h ++++ b/xen/arch/x86/include/asm/paging.h +@@ -138,7 +138,7 @@ struct paging_mode { + paddr_t ga, uint32_t *pfec, + unsigned int *page_order); + #endif +- void (*update_cr3 )(struct vcpu *v, int do_locking, ++ pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking, + bool noflush); + void (*update_paging_modes )(struct vcpu *v); + bool (*flush_tlb )(const unsigned long *vcpu_bitmap); +@@ -310,9 +310,9 @@ static inline unsigned long paging_ga_to + /* Update all the things that are derived from the guest's CR3. + * Called when the guest changes CR3; the caller can then use v->arch.cr3 + * as the value to load into the host CR3 to schedule this vcpu */ +-static inline void paging_update_cr3(struct vcpu *v, bool noflush) ++static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush) + { +- paging_get_hostmode(v)->update_cr3(v, 1, noflush); ++ return paging_get_hostmode(v)->update_cr3(v, 1, noflush); + } + + /* Update all the things that are derived from the guest's CR0/CR3/CR4. +--- a/xen/arch/x86/include/asm/shadow.h ++++ b/xen/arch/x86/include/asm/shadow.h +@@ -99,6 +99,9 @@ int shadow_set_allocation(struct domain + + int shadow_get_allocation_bytes(struct domain *d, uint64_t *size); + ++/* Helper to invoke for deferred releasing of a top-level shadow's reference. */ ++void shadow_put_top_level(struct domain *d, pagetable_t old); ++ + #else /* !CONFIG_SHADOW_PAGING */ + + #define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v)) +@@ -121,6 +124,11 @@ static inline void shadow_prepare_page_t + + static inline void shadow_blow_tables_per_domain(struct domain *d) {} + ++static inline void shadow_put_top_level(struct domain *d, pagetable_t old) ++{ ++ ASSERT_UNREACHABLE(); ++} ++ + static inline int shadow_domctl(struct domain *d, + struct xen_domctl_shadow_op *sc, + XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) +--- a/xen/arch/x86/mm/hap/hap.c ++++ b/xen/arch/x86/mm/hap/hap.c +@@ -739,11 +739,13 @@ static bool cf_check hap_invlpg(struct v + return 1; + } + +-static void cf_check hap_update_cr3( +- struct vcpu *v, int do_locking, bool noflush) ++static pagetable_t cf_check hap_update_cr3( ++ struct vcpu *v, bool do_locking, bool noflush) + { + v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3]; + hvm_update_guest_cr3(v, noflush); ++ ++ return pagetable_null(); + } + + static bool flush_vcpu(const struct vcpu *v, const unsigned long *vcpu_bitmap) +--- a/xen/arch/x86/mm/shadow/common.c ++++ b/xen/arch/x86/mm/shadow/common.c +@@ -2590,13 +2590,13 @@ void cf_check shadow_update_paging_modes + } + + /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */ +-void sh_set_toplevel_shadow(struct vcpu *v, +- unsigned int slot, +- mfn_t gmfn, +- unsigned int root_type, +- mfn_t (*make_shadow)(struct vcpu *v, +- mfn_t gmfn, +- uint32_t shadow_type)) ++pagetable_t sh_set_toplevel_shadow(struct vcpu *v, ++ unsigned int slot, ++ mfn_t gmfn, ++ unsigned int root_type, ++ mfn_t (*make_shadow)(struct vcpu *v, ++ mfn_t gmfn, ++ uint32_t shadow_type)) + { + mfn_t smfn; + pagetable_t old_entry, new_entry; +@@ -2653,20 +2653,37 @@ void sh_set_toplevel_shadow(struct vcpu + mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry))); + v->arch.paging.shadow.shadow_table[slot] = new_entry; + +- /* Decrement the refcount of the old contents of this slot */ +- if ( !pagetable_is_null(old_entry) ) ++ /* ++ * Decrement the refcount of the old contents of this slot, unless ++ * we're still running on that shadow - in that case it'll need holding ++ * on to until the actual page table switch did occur. ++ */ ++ if ( !pagetable_is_null(old_entry) && (v != current || !is_pv_domain(d)) ) + { +- mfn_t old_smfn = pagetable_get_mfn(old_entry); +- /* Need to repin the old toplevel shadow if it's been unpinned +- * by shadow_prealloc(): in PV mode we're still running on this +- * shadow and it's not safe to free it yet. */ +- if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(d, old_smfn) ) +- { +- printk(XENLOG_G_ERR "can't re-pin %"PRI_mfn"\n", mfn_x(old_smfn)); +- domain_crash(d); +- } +- sh_put_ref(d, old_smfn, 0); ++ sh_put_ref(d, pagetable_get_mfn(old_entry), 0); ++ old_entry = pagetable_null(); + } ++ ++ /* ++ * 2- and 3-level shadow mode is used for HVM only. Therefore we never run ++ * on such a shadow, so only call sites requesting an L4 shadow need to pay ++ * attention to the returned value. ++ */ ++ ASSERT(pagetable_is_null(old_entry) || root_type == SH_type_l4_64_shadow); ++ ++ return old_entry; ++} ++ ++/* ++ * Helper invoked when releasing of a top-level shadow's reference was ++ * deferred in sh_set_toplevel_shadow() above. ++ */ ++void shadow_put_top_level(struct domain *d, pagetable_t old_entry) ++{ ++ ASSERT(!pagetable_is_null(old_entry)); ++ paging_lock(d); ++ sh_put_ref(d, pagetable_get_mfn(old_entry), 0); ++ paging_unlock(d); + } + + /**************************************************************************/ +--- a/xen/arch/x86/mm/shadow/multi.c ++++ b/xen/arch/x86/mm/shadow/multi.c +@@ -3224,7 +3224,8 @@ static void cf_check sh_detach_old_table + } + } + +-static void cf_check sh_update_cr3(struct vcpu *v, int do_locking, bool noflush) ++static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking, ++ bool noflush) + /* Updates vcpu->arch.cr3 after the guest has changed CR3. + * Paravirtual guests should set v->arch.guest_table (and guest_table_user, + * if appropriate). +@@ -3238,6 +3239,7 @@ static void cf_check sh_update_cr3(struc + { + struct domain *d = v->domain; + mfn_t gmfn; ++ pagetable_t old_entry = pagetable_null(); + #if GUEST_PAGING_LEVELS == 3 + const guest_l3e_t *gl3e; + unsigned int i, guest_idx; +@@ -3247,7 +3249,7 @@ static void cf_check sh_update_cr3(struc + if ( !is_hvm_domain(d) && !v->is_initialised ) + { + ASSERT(v->arch.cr3 == 0); +- return; ++ return old_entry; + } + + if ( do_locking ) paging_lock(v->domain); +@@ -3320,11 +3322,12 @@ static void cf_check sh_update_cr3(struc + #if GUEST_PAGING_LEVELS == 4 + if ( sh_remove_write_access(d, gmfn, 4, 0) != 0 ) + guest_flush_tlb_mask(d, d->dirty_cpumask); +- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, sh_make_shadow); ++ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l4_shadow, ++ sh_make_shadow); + if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) ) + { + ASSERT(d->is_dying || d->is_shutting_down); +- return; ++ return old_entry; + } + if ( !shadow_mode_external(d) && !is_pv_32bit_domain(d) ) + { +@@ -3368,24 +3371,30 @@ static void cf_check sh_update_cr3(struc + gl2gfn = guest_l3e_get_gfn(gl3e[i]); + gl2mfn = get_gfn_query_unlocked(d, gfn_x(gl2gfn), &p2mt); + if ( p2m_is_ram(p2mt) ) +- sh_set_toplevel_shadow(v, i, gl2mfn, SH_type_l2_shadow, +- sh_make_shadow); ++ old_entry = sh_set_toplevel_shadow(v, i, gl2mfn, ++ SH_type_l2_shadow, ++ sh_make_shadow); + else +- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, +- sh_make_shadow); ++ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, ++ sh_make_shadow); + } + else +- sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, sh_make_shadow); ++ old_entry = sh_set_toplevel_shadow(v, i, INVALID_MFN, 0, ++ sh_make_shadow); ++ ++ ASSERT(pagetable_is_null(old_entry)); + } + } + #elif GUEST_PAGING_LEVELS == 2 + if ( sh_remove_write_access(d, gmfn, 2, 0) != 0 ) + guest_flush_tlb_mask(d, d->dirty_cpumask); +- sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, sh_make_shadow); ++ old_entry = sh_set_toplevel_shadow(v, 0, gmfn, SH_type_l2_shadow, ++ sh_make_shadow); ++ ASSERT(pagetable_is_null(old_entry)); + if ( unlikely(pagetable_is_null(v->arch.paging.shadow.shadow_table[0])) ) + { + ASSERT(d->is_dying || d->is_shutting_down); +- return; ++ return old_entry; + } + #else + #error This should never happen +@@ -3473,6 +3482,8 @@ static void cf_check sh_update_cr3(struc + + /* Release the lock, if we took it (otherwise it's the caller's problem) */ + if ( do_locking ) paging_unlock(v->domain); ++ ++ return old_entry; + } + + +--- a/xen/arch/x86/mm/shadow/none.c ++++ b/xen/arch/x86/mm/shadow/none.c +@@ -52,9 +52,11 @@ static unsigned long cf_check _gva_to_gf + } + #endif + +-static void cf_check _update_cr3(struct vcpu *v, int do_locking, bool noflush) ++static pagetable_t cf_check _update_cr3(struct vcpu *v, bool do_locking, ++ bool noflush) + { + ASSERT_UNREACHABLE(); ++ return pagetable_null(); + } + + static void cf_check _update_paging_modes(struct vcpu *v) +--- a/xen/arch/x86/mm/shadow/private.h ++++ b/xen/arch/x86/mm/shadow/private.h +@@ -391,13 +391,13 @@ mfn_t shadow_alloc(struct domain *d, + void shadow_free(struct domain *d, mfn_t smfn); + + /* Set up the top-level shadow and install it in slot 'slot' of shadow_table */ +-void sh_set_toplevel_shadow(struct vcpu *v, +- unsigned int slot, +- mfn_t gmfn, +- unsigned int root_type, +- mfn_t (*make_shadow)(struct vcpu *v, +- mfn_t gmfn, +- uint32_t shadow_type)); ++pagetable_t sh_set_toplevel_shadow(struct vcpu *v, ++ unsigned int slot, ++ mfn_t gmfn, ++ unsigned int root_type, ++ mfn_t (*make_shadow)(struct vcpu *v, ++ mfn_t gmfn, ++ uint32_t shadow_type)); + + /* Update the shadows in response to a pagetable write from Xen */ + int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size); +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -567,15 +567,12 @@ void write_ptbase(struct vcpu *v) + * + * Update ref counts to shadow tables appropriately. + */ +-void update_cr3(struct vcpu *v) ++pagetable_t update_cr3(struct vcpu *v) + { + mfn_t cr3_mfn; + + if ( paging_mode_enabled(v->domain) ) +- { +- paging_update_cr3(v, false); +- return; +- } ++ return paging_update_cr3(v, false); + + if ( !(v->arch.flags & TF_kernel_mode) ) + cr3_mfn = pagetable_get_mfn(v->arch.guest_table_user); +@@ -583,6 +580,8 @@ void update_cr3(struct vcpu *v) + cr3_mfn = pagetable_get_mfn(v->arch.guest_table); + + make_cr3(v, cr3_mfn); ++ ++ return pagetable_null(); + } + + static inline void set_tlbflush_timestamp(struct page_info *page) +@@ -3285,6 +3284,7 @@ int new_guest_cr3(mfn_t mfn) + struct domain *d = curr->domain; + int rc; + mfn_t old_base_mfn; ++ pagetable_t old_shadow; + + if ( is_pv_32bit_domain(d) ) + { +@@ -3352,9 +3352,22 @@ int new_guest_cr3(mfn_t mfn) + if ( !VM_ASSIST(d, m2p_strict) ) + fill_ro_mpt(mfn); + curr->arch.guest_table = pagetable_from_mfn(mfn); +- update_cr3(curr); ++ old_shadow = update_cr3(curr); ++ ++ /* ++ * In shadow mode update_cr3() can fail, in which case here we're still ++ * running on the prior top-level shadow (which we're about to release). ++ * Switch to the idle page tables in such an event; the guest will have ++ * been crashed already. ++ */ ++ if ( likely(!mfn_eq(pagetable_get_mfn(old_shadow), ++ maddr_to_mfn(curr->arch.cr3 & ~X86_CR3_NOFLUSH))) ) ++ write_ptbase(curr); ++ else ++ write_ptbase(idle_vcpu[curr->processor]); + +- write_ptbase(curr); ++ if ( !pagetable_is_null(old_shadow) ) ++ shadow_put_top_level(d, old_shadow); + + if ( likely(mfn_x(old_base_mfn) != 0) ) + { +--- a/xen/arch/x86/pv/domain.c ++++ b/xen/arch/x86/pv/domain.c +@@ -424,10 +424,13 @@ bool __init xpti_pcid_enabled(void) + + static void _toggle_guest_pt(struct vcpu *v) + { ++ bool guest_update; ++ pagetable_t old_shadow; + unsigned long cr3; + + v->arch.flags ^= TF_kernel_mode; +- update_cr3(v); ++ guest_update = v->arch.flags & TF_kernel_mode; ++ old_shadow = update_cr3(v); + + /* + * Don't flush user global mappings from the TLB. Don't tick TLB clock. +@@ -436,13 +439,31 @@ static void _toggle_guest_pt(struct vcpu + * TLB flush (for just the incoming PCID), as the top level page table may + * have changed behind our backs. To be on the safe side, suppress the + * no-flush unconditionally in this case. ++ * ++ * Furthermore in shadow mode update_cr3() can fail, in which case here ++ * we're still running on the prior top-level shadow (which we're about ++ * to release). Switch to the idle page tables in such an event; the ++ * guest will have been crashed already. + */ + cr3 = v->arch.cr3; + if ( shadow_mode_enabled(v->domain) ) ++ { + cr3 &= ~X86_CR3_NOFLUSH; ++ ++ if ( unlikely(mfn_eq(pagetable_get_mfn(old_shadow), ++ maddr_to_mfn(cr3))) ) ++ { ++ cr3 = idle_vcpu[v->processor]->arch.cr3; ++ /* Also suppress runstate/time area updates below. */ ++ guest_update = false; ++ } ++ } + write_cr3(cr3); + +- if ( !(v->arch.flags & TF_kernel_mode) ) ++ if ( !pagetable_is_null(old_shadow) ) ++ shadow_put_top_level(v->domain, old_shadow); ++ ++ if ( !guest_update ) + return; + + if ( v->arch.pv.need_update_runstate_area && update_runstate_area(v) ) diff --git a/xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch b/xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch new file mode 100644 index 0000000..96e56ec --- /dev/null +++ b/xsa439-0001-x86-AMD-extend-Zenbleed-check-to-models-good-ucode-i.patch @@ -0,0 +1,49 @@ +From d2d2dcae879c6cc05227c9620f0a772f35fe6886 Mon Sep 17 00:00:00 2001 +Message-ID: +From: Jan Beulich +Date: Wed, 23 Aug 2023 09:26:36 +0200 +Subject: [XEN PATCH 01/10] x86/AMD: extend Zenbleed check to models "good" + ucode isn't known for + +Reportedly the AMD Custom APU 0405 found on SteamDeck, models 0x90 and +0x91, (quoting the respective Linux commit) is similarly affected. Put +another instance of our Zen1 vs Zen2 distinction checks in +amd_check_zenbleed(), forcing use of the chickenbit irrespective of +ucode version (building upon real hardware never surfacing a version of +0xffffffff). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +(cherry picked from commit 145a69c0944ac70cfcf9d247c85dee9e99d9d302) +--- + xen/arch/x86/cpu/amd.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 3ea214fc2e..1bb3044be1 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -909,10 +909,17 @@ void amd_check_zenbleed(void) + case 0xa0 ... 0xaf: good_rev = 0x08a00008; break; + default: + /* +- * With the Fam17h check above, parts getting here are Zen1. +- * They're not affected. ++ * With the Fam17h check above, most parts getting here are ++ * Zen1. They're not affected. Assume Zen2 ones making it ++ * here are affected regardless of microcode version. ++ * ++ * Zen1 vs Zen2 isn't a simple model number comparison, so use ++ * STIBP as a heuristic to distinguish. + */ +- return; ++ if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ return; ++ good_rev = ~0U; ++ break; + } + + rdmsrl(MSR_AMD64_DE_CFG, val); +-- +2.41.0 + diff --git a/xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch b/xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch new file mode 100644 index 0000000..8b8e30a --- /dev/null +++ b/xsa439-0002-x86-spec-ctrl-Fix-confusion-between-SPEC_CTRL_EXIT_T.patch @@ -0,0 +1,77 @@ +From dc28aba565f226f9bec24cfde993e78478acfb4e Mon Sep 17 00:00:00 2001 +Message-ID: +In-Reply-To: +References: +From: Andrew Cooper +Date: Tue, 12 Sep 2023 15:06:49 +0100 +Subject: [XEN PATCH 02/10] x86/spec-ctrl: Fix confusion between + SPEC_CTRL_EXIT_TO_XEN{,_IST} + +c/s 3fffaf9c13e9 ("x86/entry: Avoid using alternatives in NMI/#MC paths") +dropped the only user, leaving behind the (incorrect) implication that Xen had +split exit paths. + +Delete the unused SPEC_CTRL_EXIT_TO_XEN and rename SPEC_CTRL_EXIT_TO_XEN_IST +to SPEC_CTRL_EXIT_TO_XEN for consistency. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 1c18d73774533a55ba9d1cbee8bdace03efdb5e7) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 10 ++-------- + xen/arch/x86/x86_64/entry.S | 2 +- + 2 files changed, 3 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index f23bb105c5..e8fd01243c 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -79,7 +79,6 @@ + * - SPEC_CTRL_ENTRY_FROM_PV + * - SPEC_CTRL_ENTRY_FROM_INTR + * - SPEC_CTRL_ENTRY_FROM_INTR_IST +- * - SPEC_CTRL_EXIT_TO_XEN_IST + * - SPEC_CTRL_EXIT_TO_XEN + * - SPEC_CTRL_EXIT_TO_PV + * +@@ -268,11 +267,6 @@ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ + X86_FEATURE_SC_MSR_PV + +-/* Use when exiting to Xen context. */ +-#define SPEC_CTRL_EXIT_TO_XEN \ +- ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV +- + /* Use when exiting to PV guest context. */ + #define SPEC_CTRL_EXIT_TO_PV \ + ALTERNATIVE "", \ +@@ -339,8 +333,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + UNLIKELY_END(\@_serialise) + .endm + +-/* Use when exiting to Xen in IST context. */ +-.macro SPEC_CTRL_EXIT_TO_XEN_IST ++/* Use when exiting to Xen context. */ ++.macro SPEC_CTRL_EXIT_TO_XEN + /* + * Requires %rbx=stack_end + * Clobbers %rax, %rcx, %rdx +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 7675a59ff0..b45a09823a 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -673,7 +673,7 @@ UNLIKELY_START(ne, exit_cr3) + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN_IST /* Req: %rbx=end, Clob: acd */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ + + RESTORE_ALL adj=8 + iretq +-- +2.41.0 + diff --git a/xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch b/xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch new file mode 100644 index 0000000..547032e --- /dev/null +++ b/xsa439-0003-x86-spec-ctrl-Fold-DO_SPEC_CTRL_EXIT_TO_XEN-into-it-.patch @@ -0,0 +1,88 @@ +From 84690fb82c4f4aecb72a6789d8994efa74841e09 Mon Sep 17 00:00:00 2001 +Message-ID: <84690fb82c4f4aecb72a6789d8994efa74841e09.1695733540.git.m.a.young@durham.ac.uk> +In-Reply-To: +References: +From: Andrew Cooper +Date: Tue, 12 Sep 2023 17:03:16 +0100 +Subject: [XEN PATCH 03/10] x86/spec-ctrl: Fold DO_SPEC_CTRL_EXIT_TO_XEN into + it's single user + +With the SPEC_CTRL_EXIT_TO_XEN{,_IST} confusion fixed, it's now obvious that +there's only a single EXIT_TO_XEN path. Fold DO_SPEC_CTRL_EXIT_TO_XEN into +SPEC_CTRL_EXIT_TO_XEN to simplify further fixes. + +When merging labels, switch the name to .L\@_skip_sc_msr as "skip" on its own +is going to be too generic shortly. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 694bb0f280fd08a4377e36e32b84b5062def4de2) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 40 ++++++++++-------------- + 1 file changed, 16 insertions(+), 24 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index e8fd01243c..d5f65d80ea 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -211,27 +211,6 @@ + wrmsr + .endm + +-.macro DO_SPEC_CTRL_EXIT_TO_XEN +-/* +- * Requires %rbx=stack_end +- * Clobbers %rax, %rcx, %rdx +- * +- * When returning to Xen context, look to see whether SPEC_CTRL shadowing is +- * in effect, and reload the shadow value. This covers race conditions which +- * exist with an NMI/MCE/etc hitting late in the return-to-guest path. +- */ +- xor %edx, %edx +- +- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) +- jz .L\@_skip +- +- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax +- mov $MSR_SPEC_CTRL, %ecx +- wrmsr +- +-.L\@_skip: +-.endm +- + .macro DO_SPEC_CTRL_EXIT_TO_GUEST + /* + * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo +@@ -340,11 +319,24 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + * Clobbers %rax, %rcx, %rdx + */ + testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) +- jz .L\@_skip ++ jz .L\@_skip_sc_msr + +- DO_SPEC_CTRL_EXIT_TO_XEN ++ /* ++ * When returning to Xen context, look to see whether SPEC_CTRL shadowing ++ * is in effect, and reload the shadow value. This covers race conditions ++ * which exist with an NMI/MCE/etc hitting late in the return-to-guest ++ * path. ++ */ ++ xor %edx, %edx + +-.L\@_skip: ++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) ++ jz .L\@_skip_sc_msr ++ ++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax ++ mov $MSR_SPEC_CTRL, %ecx ++ wrmsr ++ ++.L\@_skip_sc_msr: + .endm + + #endif /* __ASSEMBLY__ */ +-- +2.41.0 + diff --git a/xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch b/xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch new file mode 100644 index 0000000..3350750 --- /dev/null +++ b/xsa439-0004-x86-spec-ctrl-Turn-the-remaining-SPEC_CTRL_-ENTRY-EX.patch @@ -0,0 +1,86 @@ +From 3952c73bdbd05f0e666986fce633a591237b3c88 Mon Sep 17 00:00:00 2001 +Message-ID: <3952c73bdbd05f0e666986fce633a591237b3c88.1695733540.git.m.a.young@durham.ac.uk> +In-Reply-To: +References: +From: Andrew Cooper +Date: Fri, 1 Sep 2023 11:38:44 +0100 +Subject: [XEN PATCH 04/10] x86/spec-ctrl: Turn the remaining + SPEC_CTRL_{ENTRY,EXIT}_* into asm macros + +These have grown more complex over time, with some already having been +converted. + +Provide full Requires/Clobbers comments, otherwise missing at this level of +indirection. + +No functional change. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 7125429aafb9e3c9c88fc93001fc2300e0ac2cc8) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 37 ++++++++++++++++++------ + 1 file changed, 28 insertions(+), 9 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index d5f65d80ea..c6d5f2ad01 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -231,26 +231,45 @@ + .endm + + /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ +-#define SPEC_CTRL_ENTRY_FROM_PV \ ++.macro SPEC_CTRL_ENTRY_FROM_PV ++/* ++ * Requires %rsp=regs/cpuinfo, %rdx=0 ++ * Clobbers %rax, %rcx, %rdx ++ */ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ +- X86_FEATURE_IBPB_ENTRY_PV; \ +- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ ++ X86_FEATURE_IBPB_ENTRY_PV ++ ++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV ++ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ + X86_FEATURE_SC_MSR_PV ++.endm + + /* Use in interrupt/exception context. May interrupt Xen or PV context. */ +-#define SPEC_CTRL_ENTRY_FROM_INTR \ ++.macro SPEC_CTRL_ENTRY_FROM_INTR ++/* ++ * Requires %rsp=regs, %r14=stack_end, %rdx=0 ++ * Clobbers %rax, %rcx, %rdx ++ */ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ +- X86_FEATURE_IBPB_ENTRY_PV; \ +- ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ ++ X86_FEATURE_IBPB_ENTRY_PV ++ ++ ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV ++ + ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ + X86_FEATURE_SC_MSR_PV ++.endm + + /* Use when exiting to PV guest context. */ +-#define SPEC_CTRL_EXIT_TO_PV \ +- ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ ++.macro SPEC_CTRL_EXIT_TO_PV ++/* ++ * Requires %rax=spec_ctrl, %rsp=regs/info ++ * Clobbers %rcx, %rdx ++ */ ++ ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV ++ + DO_SPEC_CTRL_COND_VERW ++.endm + + /* + * Use in IST interrupt/exception context. May interrupt Xen or PV context. +-- +2.41.0 + diff --git a/xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch b/xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch new file mode 100644 index 0000000..dda088a --- /dev/null +++ b/xsa439-0005-x86-spec-ctrl-Improve-all-SPEC_CTRL_-ENTER-EXIT-_-co.patch @@ -0,0 +1,109 @@ +From ba023e93d0b1e60b80251bf080bab694efb9f8e3 Mon Sep 17 00:00:00 2001 +Message-ID: +In-Reply-To: +References: +From: Andrew Cooper +Date: Wed, 30 Aug 2023 20:11:50 +0100 +Subject: [XEN PATCH 05/10] x86/spec-ctrl: Improve all SPEC_CTRL_{ENTER,EXIT}_* + comments + +... to better explain how they're used. + +Doing so highlights that SPEC_CTRL_EXIT_TO_XEN is missing a VERW flush for the +corner case when e.g. an NMI hits late in an exit-to-guest path. + +Leave a TODO, which will be addressed in subsequent patches which arrange for +VERW flushing to be safe within SPEC_CTRL_EXIT_TO_XEN. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 45f00557350dc7d0756551069803fc49c29184ca) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++++++---- + 1 file changed, 31 insertions(+), 5 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index c6d5f2ad01..97c4db31cd 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -230,7 +230,10 @@ + wrmsr + .endm + +-/* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ ++/* ++ * Used after an entry from PV context: SYSCALL, SYSENTER, INT, ++ * etc. There is always a guest speculation state in context. ++ */ + .macro SPEC_CTRL_ENTRY_FROM_PV + /* + * Requires %rsp=regs/cpuinfo, %rdx=0 +@@ -245,7 +248,11 @@ + X86_FEATURE_SC_MSR_PV + .endm + +-/* Use in interrupt/exception context. May interrupt Xen or PV context. */ ++/* ++ * Used after an exception or maskable interrupt, hitting Xen or PV context. ++ * There will either be a guest speculation context, or (barring fatal ++ * exceptions) a well-formed Xen speculation context. ++ */ + .macro SPEC_CTRL_ENTRY_FROM_INTR + /* + * Requires %rsp=regs, %r14=stack_end, %rdx=0 +@@ -260,7 +267,10 @@ + X86_FEATURE_SC_MSR_PV + .endm + +-/* Use when exiting to PV guest context. */ ++/* ++ * Used when exiting from any entry context, back to PV context. This ++ * includes from an IST entry which moved onto the primary stack. ++ */ + .macro SPEC_CTRL_EXIT_TO_PV + /* + * Requires %rax=spec_ctrl, %rsp=regs/info +@@ -272,7 +282,13 @@ + .endm + + /* +- * Use in IST interrupt/exception context. May interrupt Xen or PV context. ++ * Used after an IST entry hitting Xen or PV context. Special care is needed, ++ * because when hitting Xen context, there may not be a well-formed ++ * speculation context. (i.e. it can hit in the middle of ++ * SPEC_CTRL_{ENTRY,EXIT}_* regions.) ++ * ++ * An IST entry which hits PV context moves onto the primary stack and leaves ++ * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN. + */ + .macro SPEC_CTRL_ENTRY_FROM_INTR_IST + /* +@@ -331,7 +347,14 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + UNLIKELY_END(\@_serialise) + .endm + +-/* Use when exiting to Xen context. */ ++/* ++ * Use when exiting from any entry context, back to Xen context. This ++ * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an ++ * incomplete speculation context. ++ * ++ * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we ++ * need to treat this as if it were an EXIT_TO_$GUEST case too. ++ */ + .macro SPEC_CTRL_EXIT_TO_XEN + /* + * Requires %rbx=stack_end +@@ -356,6 +379,9 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + wrmsr + + .L\@_skip_sc_msr: ++ ++ /* TODO VERW */ ++ + .endm + + #endif /* __ASSEMBLY__ */ +-- +2.41.0 + diff --git a/xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch b/xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch new file mode 100644 index 0000000..e44998e --- /dev/null +++ b/xsa439-0006-x86-entry-Adjust-restore_all_xen-to-hold-stack_end-i.patch @@ -0,0 +1,77 @@ +From 5f7efd47c8273fde972637d0360851802f76eca9 Mon Sep 17 00:00:00 2001 +Message-ID: <5f7efd47c8273fde972637d0360851802f76eca9.1695733540.git.m.a.young@durham.ac.uk> +In-Reply-To: +References: +From: Andrew Cooper +Date: Wed, 13 Sep 2023 13:48:16 +0100 +Subject: [XEN PATCH 06/10] x86/entry: Adjust restore_all_xen to hold stack_end + in %r14 + +All other SPEC_CTRL_{ENTRY,EXIT}_* helpers hold stack_end in %r14. Adjust it +for consistency. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 7aa28849a1155d856e214e9a80a7e65fffdc3e58) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 8 ++++---- + xen/arch/x86/x86_64/entry.S | 8 ++++---- + 2 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 97c4db31cd..66c706496f 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -357,10 +357,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + .macro SPEC_CTRL_EXIT_TO_XEN + /* +- * Requires %rbx=stack_end ++ * Requires %r14=stack_end + * Clobbers %rax, %rcx, %rdx + */ +- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) ++ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) + jz .L\@_skip_sc_msr + + /* +@@ -371,10 +371,10 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + xor %edx, %edx + +- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) ++ testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) + jz .L\@_skip_sc_msr + +- mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax ++ mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax + mov $MSR_SPEC_CTRL, %ecx + wrmsr + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index b45a09823a..92279a225d 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -665,15 +665,15 @@ restore_all_xen: + * Check whether we need to switch to the per-CPU page tables, in + * case we return to late PV exit code (from an NMI or #MC). + */ +- GET_STACK_END(bx) +- cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%rbx) ++ GET_STACK_END(14) ++ cmpb $0, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) + UNLIKELY_START(ne, exit_cr3) +- mov STACK_CPUINFO_FIELD(pv_cr3)(%rbx), %rax ++ mov STACK_CPUINFO_FIELD(pv_cr3)(%r14), %rax + mov %rax, %cr3 + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN /* Req: %rbx=end, Clob: acd */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ + + RESTORE_ALL adj=8 + iretq +-- +2.41.0 + diff --git a/xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch b/xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch new file mode 100644 index 0000000..2e36bcc --- /dev/null +++ b/xsa439-0007-x86-entry-Track-the-IST-ness-of-an-entry-for-the-exi.patch @@ -0,0 +1,112 @@ +From e4a71bc0da0baf7464bb0d8e33053f330e5ea366 Mon Sep 17 00:00:00 2001 +Message-ID: +In-Reply-To: +References: +From: Andrew Cooper +Date: Wed, 13 Sep 2023 12:20:12 +0100 +Subject: [XEN PATCH 07/10] x86/entry: Track the IST-ness of an entry for the + exit paths + +Use %r12 to hold an ist_exit boolean. This register is zero elsewhere in the +entry/exit asm, so it only needs setting in the IST path. + +As this is subtle and fragile, add check_ist_exit() to be used in debugging +builds to cross-check that the ist_exit boolean matches the entry vector. + +Write check_ist_exit() it in C, because it's debug only and the logic more +complicated than I care to maintain in asm. + +For now, we only need to use this signal in the exit-to-Xen path, but some +exit-to-guest paths happen in IST context too. Check the correctness in all +exit paths to avoid the logic bit-rotting. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 21bdc25b05a0f8ab6bc73520a9ca01327360732c) + +x86/entry: Partially revert IST-exit checks + +The patch adding check_ist_exit() didn't account for the fact that +reset_stack_and_jump() is not an ABI-preserving boundary. The IST-ness in +%r12 doesn't survive into the next context, and is a stale value C. + +This shows up in Gitlab CI for the Clang build: + + https://gitlab.com/xen-project/people/andyhhp/xen/-/jobs/5112783827 + +and in OSSTest for GCC 8: + + http://logs.test-lab.xenproject.org/osstest/logs/183045/test-amd64-amd64-xl-qemuu-debianhvm-amd64/serial-pinot0.log + +There's no straightforward way to reconstruct the IST-exit-ness on the +exit-to-guest path after a context switch. For now, we only need IST-exit on +the return-to-Xen path. + +Fixes: 21bdc25b05a0 ("x86/entry: Track the IST-ness of an entry for the exit paths") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 9b57c800b79b96769ea3dcd6468578fa664d19f9) +--- + xen/arch/x86/traps.c | 13 +++++++++++++ + xen/arch/x86/x86_64/entry.S | 13 ++++++++++++- + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c +index d12004b1c6..e65cc60041 100644 +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -2315,6 +2315,19 @@ void asm_domain_crash_synchronous(unsigned long addr) + do_softirq(); + } + ++#ifdef CONFIG_DEBUG ++void check_ist_exit(const struct cpu_user_regs *regs, bool ist_exit) ++{ ++ const unsigned int ist_mask = ++ (1U << X86_EXC_NMI) | (1U << X86_EXC_DB) | ++ (1U << X86_EXC_DF) | (1U << X86_EXC_MC); ++ uint8_t ev = regs->entry_vector; ++ bool is_ist = (ev < TRAP_nr) && ((1U << ev) & ist_mask); ++ ++ ASSERT(is_ist == ist_exit); ++} ++#endif ++ + /* + * Local variables: + * mode: C +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 92279a225d..4cebc4fbe3 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -659,8 +659,15 @@ ENTRY(early_page_fault) + .section .text.entry, "ax", @progbits + + ALIGN +-/* No special register assumptions. */ ++/* %r12=ist_exit */ + restore_all_xen: ++ ++#ifdef CONFIG_DEBUG ++ mov %rsp, %rdi ++ mov %r12, %rsi ++ call check_ist_exit ++#endif ++ + /* + * Check whether we need to switch to the per-CPU page tables, in + * case we return to late PV exit code (from an NMI or #MC). +@@ -1091,6 +1098,10 @@ handle_ist_exception: + .L_ist_dispatch_done: + mov %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14) + mov %bl, STACK_CPUINFO_FIELD(use_pv_cr3)(%r14) ++ ++ /* This is an IST exit */ ++ mov $1, %r12d ++ + cmpb $TRAP_nmi,UREGS_entry_vector(%rsp) + jne ret_from_intr + +-- +2.41.0 + diff --git a/xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch b/xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch new file mode 100644 index 0000000..6e00ca6 --- /dev/null +++ b/xsa439-0008-x86-spec-ctrl-Issue-VERW-during-IST-exit-to-Xen.patch @@ -0,0 +1,92 @@ +From 2e2c3efcfc9f183674a8de6ed954ffbe7188b70d Mon Sep 17 00:00:00 2001 +Message-ID: <2e2c3efcfc9f183674a8de6ed954ffbe7188b70d.1695733540.git.m.a.young@durham.ac.uk> +In-Reply-To: +References: +From: Andrew Cooper +Date: Wed, 13 Sep 2023 13:53:33 +0100 +Subject: [XEN PATCH 08/10] x86/spec-ctrl: Issue VERW during IST exit to Xen + +There is a corner case where e.g. an NMI hitting an exit-to-guest path after +SPEC_CTRL_EXIT_TO_* would have run the entire NMI handler *after* the VERW +flush to scrub potentially sensitive data from uarch buffers. + +In order to compensate, issue VERW when exiting to Xen from an IST entry. + +SPEC_CTRL_EXIT_TO_XEN already has two reads of spec_ctrl_flags off the stack, +and we're about to add a third. Load the field into %ebx, and list the +register as clobbered. + +%r12 has been arranged to be the ist_exit signal, so add this as an input +dependency and use it to identify when to issue a VERW. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit 3ee6066bcd737756b0990d417d94eddc0b0d2585) +--- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 20 +++++++++++++++----- + xen/arch/x86/x86_64/entry.S | 2 +- + 2 files changed, 16 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 66c706496f..28a75796e6 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -357,10 +357,12 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + .macro SPEC_CTRL_EXIT_TO_XEN + /* +- * Requires %r14=stack_end +- * Clobbers %rax, %rcx, %rdx ++ * Requires %r12=ist_exit, %r14=stack_end ++ * Clobbers %rax, %rbx, %rcx, %rdx + */ +- testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx ++ ++ testb $SCF_ist_sc_msr, %bl + jz .L\@_skip_sc_msr + + /* +@@ -371,7 +373,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + */ + xor %edx, %edx + +- testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) ++ testb $SCF_use_shadow, %bl + jz .L\@_skip_sc_msr + + mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax +@@ -380,8 +382,16 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + + .L\@_skip_sc_msr: + +- /* TODO VERW */ ++ test %r12, %r12 ++ jz .L\@_skip_ist_exit ++ ++ /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ ++ testb $SCF_verw, %bl ++ jz .L\@_skip_verw ++ verw STACK_CPUINFO_FIELD(verw_sel)(%r14) ++.L\@_skip_verw: + ++.L\@_skip_ist_exit: + .endm + + #endif /* __ASSEMBLY__ */ +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 4cebc4fbe3..c12e011b4d 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -680,7 +680,7 @@ UNLIKELY_START(ne, exit_cr3) + UNLIKELY_END(exit_cr3) + + /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ +- SPEC_CTRL_EXIT_TO_XEN /* Req: %r14=end, Clob: acd */ ++ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ + + RESTORE_ALL adj=8 + iretq +-- +2.41.0 + diff --git a/xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch b/xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch new file mode 100644 index 0000000..5f063b1 --- /dev/null +++ b/xsa439-0009-x86-amd-Introduce-is_zen-1-2-_uarch-predicates.patch @@ -0,0 +1,94 @@ +From 19ee1e1faa32b79274b3484cb1170a5970f1e602 Mon Sep 17 00:00:00 2001 +Message-ID: <19ee1e1faa32b79274b3484cb1170a5970f1e602.1695733540.git.m.a.young@durham.ac.uk> +In-Reply-To: +References: +From: Andrew Cooper +Date: Fri, 15 Sep 2023 12:13:51 +0100 +Subject: [XEN PATCH 09/10] x86/amd: Introduce is_zen{1,2}_uarch() predicates + +We already have 3 cases using STIBP as a Zen1/2 heuristic, and are about to +introduce a 4th. Wrap the heuristic into a pair of predicates rather than +opencoding it, and the explanation of the heuristic, at each usage site. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit de1d265001397f308c5c3c5d3ffc30e7ef8c0705) +--- + xen/arch/x86/cpu/amd.c | 18 ++++-------------- + xen/arch/x86/include/asm/amd.h | 11 +++++++++++ + 2 files changed, 15 insertions(+), 14 deletions(-) + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index 1bb3044be1..e94ba5a0e0 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -855,15 +855,13 @@ void amd_set_legacy_ssbd(bool enable) + * non-branch instructions to be ignored. It is to be set unilaterally in + * newer microcode. + * +- * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a +- * simple model number comparison, so use STIBP as a heuristic to separate the +- * two uarches in Fam17h(AMD)/18h(Hygon). ++ * This chickenbit is something unrelated on Zen1. + */ + void amd_init_spectral_chicken(void) + { + uint64_t val, chickenbit = 1 << 1; + +- if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ if (cpu_has_hypervisor || !is_zen2_uarch()) + return; + + if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit)) +@@ -912,11 +910,8 @@ void amd_check_zenbleed(void) + * With the Fam17h check above, most parts getting here are + * Zen1. They're not affected. Assume Zen2 ones making it + * here are affected regardless of microcode version. +- * +- * Zen1 vs Zen2 isn't a simple model number comparison, so use +- * STIBP as a heuristic to distinguish. + */ +- if (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ if (is_zen1_uarch()) + return; + good_rev = ~0U; + break; +@@ -1277,12 +1272,7 @@ static int __init cf_check zen2_c6_errata_check(void) + */ + s_time_t delta; + +- /* +- * Zen1 vs Zen2 isn't a simple model number comparison, so use STIBP as +- * a heuristic to separate the two uarches in Fam17h. +- */ +- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || +- !boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++ if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch()) + return 0; + + /* +diff --git a/xen/arch/x86/include/asm/amd.h b/xen/arch/x86/include/asm/amd.h +index a975d3de26..82324110ab 100644 +--- a/xen/arch/x86/include/asm/amd.h ++++ b/xen/arch/x86/include/asm/amd.h +@@ -140,6 +140,17 @@ + AMD_MODEL_RANGE(0x11, 0x0, 0x0, 0xff, 0xf), \ + AMD_MODEL_RANGE(0x12, 0x0, 0x0, 0xff, 0xf)) + ++/* ++ * The Zen1 and Zen2 microarchitectures are implemented by AMD (Fam17h) and ++ * Hygon (Fam18h) but without simple model number rules. Instead, use STIBP ++ * as a heuristic that distinguishes the two. ++ * ++ * The caller is required to perform the appropriate vendor/family checks ++ * first. ++ */ ++#define is_zen1_uarch() (!boot_cpu_has(X86_FEATURE_AMD_STIBP)) ++#define is_zen2_uarch() boot_cpu_has(X86_FEATURE_AMD_STIBP) ++ + struct cpuinfo_x86; + int cpu_has_amd_erratum(const struct cpuinfo_x86 *, int, ...); + +-- +2.41.0 + diff --git a/xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch b/xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch new file mode 100644 index 0000000..0dc6780 --- /dev/null +++ b/xsa439-0010-x86-spec-ctrl-Mitigate-the-Zen1-DIV-leakage.patch @@ -0,0 +1,231 @@ +From 9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5 Mon Sep 17 00:00:00 2001 +Message-ID: <9ac2f49f5fa3a5159409241d4f74fb0d721dd4c5.1695733540.git.m.a.young@durham.ac.uk> +In-Reply-To: +References: +From: Andrew Cooper +Date: Wed, 30 Aug 2023 20:24:25 +0100 +Subject: [XEN PATCH 10/10] x86/spec-ctrl: Mitigate the Zen1 DIV leakage + +In the Zen1 microarchitecure, there is one divider in the pipeline which +services uops from both threads. In the case of #DE, the latched result from +the previous DIV to execute will be forwarded speculatively. + +This is an interesting covert channel that allows two threads to communicate +without any system calls. In also allows userspace to obtain the result of +the most recent DIV instruction executed (even speculatively) in the core, +which can be from a higher privilege context. + +Scrub the result from the divider by executing a non-faulting divide. This +needs performing on the exit-to-guest paths, and ist_exit-to-Xen. + +Alternatives in IST context is believed safe now that it's done in NMI +context. + +This is XSA-439 / CVE-2023-20588. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +(cherry picked from commit b5926c6ecf05c28ee99c6248c42d691ccbf0c315) +--- + docs/misc/xen-command-line.pandoc | 6 ++- + xen/arch/x86/hvm/svm/entry.S | 1 + + xen/arch/x86/include/asm/cpufeatures.h | 2 +- + xen/arch/x86/include/asm/spec_ctrl_asm.h | 17 +++++++++ + xen/arch/x86/spec_ctrl.c | 48 +++++++++++++++++++++++- + 5 files changed, 71 insertions(+), 3 deletions(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index d9dae740cc..b92c8f969c 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2315,7 +2315,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + > {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, + > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, + > eager-fpu,l1d-flush,branch-harden,srb-lock, +-> unpriv-mmio,gds-mit}= ]` ++> unpriv-mmio,gds-mit,div-scrub}= ]` + + Controls for speculative execution sidechannel mitigations. By default, Xen + will pick the most appropriate mitigations based on compiled in support, +@@ -2437,6 +2437,10 @@ has elected not to lock the configuration, Xen will use GDS_CTRL to mitigate + GDS with. Otherwise, Xen will mitigate by disabling AVX, which blocks the use + of the AVX2 Gather instructions. + ++On all hardware, the `div-scrub=` option can be used to force or prevent Xen ++from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate ++DIV-leakage on hardware believed to be vulnerable. ++ + ### sync_console + > `= ` + +diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S +index 981cd82e7c..934f12cf5c 100644 +--- a/xen/arch/x86/hvm/svm/entry.S ++++ b/xen/arch/x86/hvm/svm/entry.S +@@ -74,6 +74,7 @@ __UNLIKELY_END(nsvm_hap) + 1: /* No Spectre v1 concerns. Execution will hit VMRUN imminently. */ + .endm + ALTERNATIVE "", svm_vmentry_spec_ctrl, X86_FEATURE_SC_MSR_HVM ++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV + + pop %r15 + pop %r14 +diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h +index da0593de85..c3aad21c3b 100644 +--- a/xen/arch/x86/include/asm/cpufeatures.h ++++ b/xen/arch/x86/include/asm/cpufeatures.h +@@ -35,7 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM + XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ + XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */ + XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ +-/* Bits 23 unused. */ ++XEN_CPUFEATURE(SC_DIV, X86_SYNTH(23)) /* DIV scrub needed */ + XEN_CPUFEATURE(SC_RSB_IDLE, X86_SYNTH(24)) /* RSB overwrite needed for idle. */ + XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ + XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ +diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h +index 28a75796e6..f4b8b9d956 100644 +--- a/xen/arch/x86/include/asm/spec_ctrl_asm.h ++++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h +@@ -177,6 +177,19 @@ + .L\@_verw_skip: + .endm + ++.macro DO_SPEC_CTRL_DIV ++/* ++ * Requires nothing ++ * Clobbers %rax ++ * ++ * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any ++ * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber ++ * %rdx. ++ */ ++ mov $1, %eax ++ div %al ++.endm ++ + .macro DO_SPEC_CTRL_ENTRY maybexen:req + /* + * Requires %rsp=regs (also cpuinfo if !maybexen) +@@ -279,6 +292,8 @@ + ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV + + DO_SPEC_CTRL_COND_VERW ++ ++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV + .endm + + /* +@@ -391,6 +406,8 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): + verw STACK_CPUINFO_FIELD(verw_sel)(%r14) + .L\@_skip_verw: + ++ ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV ++ + .L\@_skip_ist_exit: + .endm + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 79b98f0fe7..0ff3c895ac 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -79,6 +79,7 @@ static int8_t __initdata opt_srb_lock = -1; + static bool __initdata opt_unpriv_mmio; + static bool __ro_after_init opt_fb_clear_mmio; + static int8_t __initdata opt_gds_mit = -1; ++static int8_t __initdata opt_div_scrub = -1; + + static int __init cf_check parse_spec_ctrl(const char *s) + { +@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_srb_lock = 0; + opt_unpriv_mmio = false; + opt_gds_mit = 0; ++ opt_div_scrub = 0; + } + else if ( val > 0 ) + rc = -EINVAL; +@@ -285,6 +287,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) + opt_unpriv_mmio = val; + else if ( (val = parse_boolean("gds-mit", s, ss)) >= 0 ) + opt_gds_mit = val; ++ else if ( (val = parse_boolean("div-scrub", s, ss)) >= 0 ) ++ opt_div_scrub = val; + else + rc = -EINVAL; + +@@ -485,7 +489,7 @@ static void __init print_details(enum ind_thunk thunk) + "\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n", ++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : +@@ -510,6 +514,7 @@ static void __init print_details(enum ind_thunk thunk) + opt_l1d_flush ? " L1D_FLUSH" : "", + opt_md_clear_pv || opt_md_clear_hvm || + opt_fb_clear_mmio ? " VERW" : "", ++ opt_div_scrub ? " DIV" : "", + opt_branch_harden ? " BRANCH_HARDEN" : ""); + + /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ +@@ -967,6 +972,45 @@ static void __init srso_calculations(bool hw_smt_enabled) + setup_force_cpu_cap(X86_FEATURE_SRSO_NO); + } + ++/* ++ * The Div leakage issue is specific to the AMD Zen1 microarchitecure. ++ * ++ * However, there's no $FOO_NO bit defined, so if we're virtualised we have no ++ * hope of spotting the case where we might move to vulnerable hardware. We ++ * also can't make any useful conclusion about SMT-ness. ++ * ++ * Don't check the hypervisor bit, so at least we do the safe thing when ++ * booting on something that looks like a Zen1 CPU. ++ */ ++static bool __init has_div_vuln(void) ++{ ++ if ( !(boot_cpu_data.x86_vendor & ++ (X86_VENDOR_AMD | X86_VENDOR_HYGON)) ) ++ return false; ++ ++ if ( boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18 ) ++ return false; ++ ++ return is_zen1_uarch(); ++} ++ ++static void __init div_calculations(bool hw_smt_enabled) ++{ ++ bool cpu_bug_div = has_div_vuln(); ++ ++ if ( opt_div_scrub == -1 ) ++ opt_div_scrub = cpu_bug_div; ++ ++ if ( opt_div_scrub ) ++ setup_force_cpu_cap(X86_FEATURE_SC_DIV); ++ ++ if ( opt_smt == -1 && !cpu_has_hypervisor && cpu_bug_div && hw_smt_enabled ) ++ warning_add( ++ "Booted on leaky-DIV hardware with SMT/Hyperthreading\n" ++ "enabled. Please assess your configuration and choose an\n" ++ "explicit 'smt=' setting. See XSA-439.\n"); ++} ++ + static void __init ibpb_calculations(void) + { + bool def_ibpb_entry = false; +@@ -1726,6 +1770,8 @@ void __init init_speculation_mitigations(void) + + ibpb_calculations(); + ++ div_calculations(hw_smt_enabled); ++ + /* Check whether Eager FPU should be enabled by default. */ + if ( opt_eager_fpu == -1 ) + opt_eager_fpu = should_use_eager_fpu(); +-- +2.41.0 +