From 81b1db8227e9e4377fab84deb03ed078c2b7f943 Mon Sep 17 00:00:00 2001 From: Michael Young Date: Mar 20 2024 19:12:09 +0000 Subject: update to xen-4.18.1 --- diff --git a/.gitignore b/.gitignore index 7c63a9b..97a51c2 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ lwip-1.3.0.tar.gz pciutils-2.2.9.tar.bz2 zlib-1.2.3.tar.gz polarssl-1.1.4-gpl.tgz -/xen-4.18.0.tar.gz +/xen-4.18.1.tar.gz diff --git a/sources b/sources index 1aaa8a6..8d4dabc 100644 --- a/sources +++ b/sources @@ -4,4 +4,4 @@ SHA512 (newlib-1.16.0.tar.gz) = 40eb96bbc6736a16b6399e0cdb73e853d0d90b685c967e77 SHA512 (zlib-1.2.3.tar.gz) = 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e SHA512 (polarssl-1.1.4-gpl.tgz) = 88da614e4d3f4409c4fd3bb3e44c7587ba051e3fed4e33d526069a67e8180212e1ea22da984656f50e290049f60ddca65383e5983c0f8884f648d71f698303ad SHA512 (pciutils-2.2.9.tar.bz2) = 2b3d98d027e46d8c08037366dde6f0781ca03c610ef2b380984639e4ef39899ed8d8b8e4cd9c9dc54df101279b95879bd66bfd4d04ad07fef41e847ea7ae32b5 -SHA512 (xen-4.18.0.tar.gz) = 4cc9fd155144045a173c5f8ecc45f149817f1034eec618cb6f8b0494ef2fb5b95c4c60cf0bf4bec4bef8a622c35b6a3cb7dedc38e6d95e726f1611c73ddb3273 +SHA512 (xen-4.18.1.tar.gz) = 6d67c177a31dae6979c111498f65cff2a7a792299cc72e658ae9f926468c14092807de94b6e269849fafdb6ed5a9d076400ecde0c4fec3b2b4bdb5f5ef39fcbb diff --git a/xen.gcc12.fixes.patch b/xen.gcc12.fixes.patch index 66d13b1..b35440f 100644 --- a/xen.gcc12.fixes.patch +++ b/xen.gcc12.fixes.patch @@ -1,7 +1,7 @@ --- xen-4.16.0/Config.mk.orig 2021-11-30 11:42:42.000000000 +0000 +++ xen-4.16.0/Config.mk 2022-01-24 20:25:16.687125822 +0000 @@ -186,6 +186,7 @@ - $(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement) + $(call cc-option-add,CFLAGS,CC,-Wno-unused-but-set-variable) $(call cc-option-add,CFLAGS,CC,-Wno-unused-local-typedefs) +$(call cc-option-add,CFLAGS,CC,-Wno-error=array-bounds) diff --git a/xen.git-576528a2a742069af203e90c613c5c93e23c9755.patch b/xen.git-576528a2a742069af203e90c613c5c93e23c9755.patch deleted file mode 100644 index 20acd78..0000000 --- a/xen.git-576528a2a742069af203e90c613c5c93e23c9755.patch +++ /dev/null @@ -1,47 +0,0 @@ -From 576528a2a742069af203e90c613c5c93e23c9755 Mon Sep 17 00:00:00 2001 -From: =?utf8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 27 Feb 2024 14:58:40 +0100 -Subject: [PATCH] x86/spec: do not print thunk option selection if not built-in -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -Since the thunk built-in enable is printed as part of the "Compiled-in -support:" line, avoid printing anything in "Xen settings:" if the thunk is -disabled at build time. - -Note the BTI-Thunk option printing is also adjusted to print a colon in the -same way the other options on the line do. - -Requested-by: Jan Beulich -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/arch/x86/spec_ctrl.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 74d2136d42..752225faa6 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -504,11 +504,12 @@ static void __init print_details(enum ind_thunk thunk) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -- thunk == THUNK_NONE ? "N/A" : -- thunk == THUNK_RETPOLINE ? "RETPOLINE" : -- thunk == THUNK_LFENCE ? "LFENCE" : -- thunk == THUNK_JMP ? "JMP" : "?", -+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -+ thunk != THUNK_NONE ? "BTI-Thunk: " : "", -+ thunk == THUNK_NONE ? "" : -+ thunk == THUNK_RETPOLINE ? "RETPOLINE, " : -+ thunk == THUNK_LFENCE ? "LFENCE, " : -+ thunk == THUNK_JMP ? "JMP, " : "?, ", - (!boot_cpu_has(X86_FEATURE_IBRSB) && - !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : - (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", --- -2.30.2 - diff --git a/xen.git-60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3.patch b/xen.git-60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3.patch deleted file mode 100644 index a1b9f49..0000000 --- a/xen.git-60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3.patch +++ /dev/null @@ -1,55 +0,0 @@ -From 60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3 Mon Sep 17 00:00:00 2001 -From: =?utf8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Mon, 26 Feb 2024 16:06:42 +0100 -Subject: [PATCH] x86/spec: fix BRANCH_HARDEN option to only be set when - build-enabled -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -The current logic to handle the BRANCH_HARDEN option will report it as enabled -even when build-time disabled. Fix this by only allowing the option to be set -when support for it is built into Xen. - -Fixes: 2d6f36daa086 ('x86/nospec: Introduce CONFIG_SPECULATIVE_HARDEN_BRANCH') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/arch/x86/spec_ctrl.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 421fe3f640..503f1c7a37 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -50,7 +50,8 @@ static int8_t __initdata opt_psfd = -1; - int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; --static bool __initdata opt_branch_harden = true; -+static bool __initdata opt_branch_harden = -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); - - bool __initdata bsp_delay_spec_ctrl; - uint8_t __read_mostly default_xen_spec_ctrl; -@@ -268,7 +269,16 @@ static int __init cf_check parse_spec_ctrl(const char *s) - else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) - opt_l1d_flush = val; - else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 ) -- opt_branch_harden = val; -+ { -+ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ) -+ opt_branch_harden = val; -+ else -+ { -+ no_config_param("SPECULATIVE_HARDEN_BRANCH", "spec-ctrl", s, -+ ss); -+ rc = -EINVAL; -+ } -+ } - else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) - opt_srb_lock = val; - else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) --- -2.30.2 - diff --git a/xen.git-6e9507f7d51fe49df8bc70f83e49ce06c92e4e54.patch b/xen.git-6e9507f7d51fe49df8bc70f83e49ce06c92e4e54.patch deleted file mode 100644 index a78c792..0000000 --- a/xen.git-6e9507f7d51fe49df8bc70f83e49ce06c92e4e54.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 6e9507f7d51fe49df8bc70f83e49ce06c92e4e54 Mon Sep 17 00:00:00 2001 -From: =?utf8?q?Roger=20Pau=20Monn=C3=A9?= -Date: Tue, 27 Feb 2024 14:57:52 +0100 -Subject: [PATCH] x86/spec: print the built-in SPECULATIVE_HARDEN_* options -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -Just like it's done for INDIRECT_THUNK and SHADOW_PAGING. - -Reported-by: Jan Beulich -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/arch/x86/spec_ctrl.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 503f1c7a37..2d17cbe25f 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -476,13 +476,25 @@ static void __init print_details(enum ind_thunk thunk) - (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); - - /* Compiled-in support which pertains to mitigations. */ -- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) -+ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ) - printk(" Compiled-in support:" - #ifdef CONFIG_INDIRECT_THUNK - " INDIRECT_THUNK" - #endif - #ifdef CONFIG_SHADOW_PAGING - " SHADOW_PAGING" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_ARRAY -+ " HARDEN_ARRAY" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH -+ " HARDEN_BRANCH" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS -+ " HARDEN_GUEST_ACCESS" - #endif - "\n"); - --- -2.30.2 - diff --git a/xen.git-de17162cafd27f2865a3102a2ec0f386a02ed03d.patch b/xen.git-de17162cafd27f2865a3102a2ec0f386a02ed03d.patch deleted file mode 100644 index 6043d48..0000000 --- a/xen.git-de17162cafd27f2865a3102a2ec0f386a02ed03d.patch +++ /dev/null @@ -1,99 +0,0 @@ -From de17162cafd27f2865a3102a2ec0f386a02ed03d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Thu, 29 Feb 2024 11:26:40 +0000 -Subject: [PATCH] x86/cpu-policy: Allow for levelling of VERW side effects -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool. Allow this, by -having them unconditinally set in max, with the host values reflected in -default. Annotate the bits as having special properies. - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné ---- - xen/arch/x86/cpu-policy.c | 24 +++++++++++++++++++++ - xen/arch/x86/include/asm/cpufeature.h | 1 + - xen/include/public/arch-x86/cpufeatureset.h | 4 ++-- - 3 files changed, 27 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 609db6946f..2c6f03057b 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -442,6 +442,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - __set_bit(X86_FEATURE_RSBA, fs); - __set_bit(X86_FEATURE_RRSBA, fs); - -+ /* -+ * These bits indicate that the VERW instruction may have gained -+ * scrubbing side effects. With pooling, they mean "you might migrate -+ * somewhere where scrubbing is necessary", and may need exposing on -+ * unaffected hardware. This is fine, because the VERW instruction -+ * has been around since the 286. -+ */ -+ __set_bit(X86_FEATURE_MD_CLEAR, fs); -+ __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -@@ -486,6 +496,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) - __clear_bit(X86_FEATURE_RDRAND, fs); - -+ /* -+ * These bits indicate that the VERW instruction may have gained -+ * scrubbing side effects. The max policy has them set for migration -+ * reasons, so reset the default policy back to the host values in -+ * case we're unaffected. -+ */ -+ __clear_bit(X86_FEATURE_MD_CLEAR, fs); -+ if ( cpu_has_md_clear ) -+ __set_bit(X86_FEATURE_MD_CLEAR, fs); -+ -+ __clear_bit(X86_FEATURE_FB_CLEAR, fs); -+ if ( cpu_has_fb_clear ) -+ __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index be16492c68..ad24d0fa88 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -178,6 +178,7 @@ static inline bool boot_cpu_has(unsigned int feat) - #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) - #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT) - #define cpu_has_srbds_ctrl boot_cpu_has(X86_FEATURE_SRBDS_CTRL) -+#define cpu_has_md_clear boot_cpu_has(X86_FEATURE_MD_CLEAR) - #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) - #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) - #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index b230d3a690..0374cec3a2 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -262,7 +262,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single - XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */ - XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */ - XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */ --XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */ -+XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */ - XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ - XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ - XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ -@@ -334,7 +334,7 @@ XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing - XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */ - XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */ - XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ --XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ -+XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*!A Fill Buffers cleared by VERW */ - XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ - XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */ - XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ --- -2.30.2 - diff --git a/xen.spec b/xen.spec index 1236568..06a5573 100644 --- a/xen.spec +++ b/xen.spec @@ -54,8 +54,8 @@ Summary: Xen is a virtual machine monitor Name: xen -Version: 4.18.0 -Release: 7%{?dist} +Version: 4.18.1 +Release: 1%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -112,30 +112,8 @@ Patch46: xen.efi.build.patch Patch47: xen.gcc13.fixes.patch Patch49: xen.python3.12.patch Patch50: xen.ocaml5.fixes.patch -Patch51: xsa447.patch Patch52: xen.gcc14.fixes.patch Patch53: newlib.gcc14.fixes.patch -Patch54: xsa449.patch -Patch55: xsa450.patch -Patch56: xsa451-4.18.patch -Patch57: xen.git-de17162cafd27f2865a3102a2ec0f386a02ed03d.patch -Patch58: xsa452-4.18-1.patch -Patch59: xsa452-4.18-2.patch -Patch60: xsa452-4.18-3.patch -Patch61: xsa452-4.18-4.patch -Patch62: xsa452-4.18-5.patch -Patch63: xsa452-4.18-6.patch -Patch64: xsa452-4.18-7.patch -Patch65: xen.git-60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3.patch -Patch66: xen.git-6e9507f7d51fe49df8bc70f83e49ce06c92e4e54.patch -Patch67: xen.git-576528a2a742069af203e90c613c5c93e23c9755.patch -Patch68: xsa453-4.18-1.patch -Patch69: xsa453-4.18-2.patch -Patch70: xsa453-4.18-3.patch -Patch71: xsa453-4.18-4.patch -Patch72: xsa453-4.18-5.patch -Patch73: xsa453-4.18-6.patch -Patch74: xsa453-4.18-7.patch %if %build_qemutrad @@ -348,30 +326,8 @@ manage Xen virtual machines. %if "%dist" != ".fc38" %patch 50 -p1 %endif -%patch 51 -p1 %patch 52 -p1 %patch 53 -p1 -%patch 54 -p1 -%patch 55 -p1 -%patch 56 -p1 -%patch 57 -p1 -%patch 58 -p1 -%patch 59 -p1 -%patch 60 -p1 -%patch 61 -p1 -%patch 62 -p1 -%patch 63 -p1 -%patch 64 -p1 -%patch 65 -p1 -%patch 66 -p1 -%patch 67 -p1 -%patch 68 -p1 -%patch 69 -p1 -%patch 70 -p1 -%patch 71 -p1 -%patch 72 -p1 -%patch 73 -p1 -%patch 74 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -978,6 +934,11 @@ fi %endif %changelog +* Wed Mar 20 2024 Michael Young - 4.18.1-1 +- update to xen-4.18.1 + rebase xen.gcc12.fixes.patch + remove patches now included or superceded upstream + * Wed Mar 13 2024 Michael Young - 4.18.0-7 - x86: Register File Data Sampling [XSA-452, CVE-2023-28746] - GhostRace: Speculative Race Conditions [XSA-453, CVE-2024-2193] diff --git a/xsa447.patch b/xsa447.patch deleted file mode 100644 index 2e26396..0000000 --- a/xsa447.patch +++ /dev/null @@ -1,117 +0,0 @@ -From 084c7312fa6c1d4a7fa343efa1d7d73693dafff4 Mon Sep 17 00:00:00 2001 -From: Michal Orzel -Date: Thu, 23 Nov 2023 15:53:02 +0100 -Subject: [PATCH] xen/arm: page: Avoid pointer overflow on cache clean & - invalidate - -On Arm32, after cleaning and invalidating the last dcache line of the top -domheap page i.e. VA = 0xfffff000 (as a result of flushing the page to -RAM), we end up adding the value of a dcache line size to the pointer -once again, which results in a pointer arithmetic overflow (with 64B line -size, operation 0xffffffc0 + 0x40 overflows to 0x0). Such behavior is -undefined and given the wide range of compiler versions we support, it is -difficult to determine what could happen in such scenario. - -Modify clean_and_invalidate_dcache_va_range() as well as -clean_dcache_va_range() and invalidate_dcache_va_range() due to similarity -of handling to prevent pointer arithmetic overflow. Modify the loops to -use an additional variable to store the index of the next cacheline. -Add an assert to prevent passing a region that wraps around which is -illegal and would end up in a page fault anyway (region 0-2MB is -unmapped). Lastly, return early if size passed is 0. - -Note that on Arm64, we don't have this problem given that the max VA -space we support is 48-bits. - -This is XSA-447 / CVE-2023-46837. - -Signed-off-by: Michal Orzel -Reviewed-by: Julien Grall ---- - xen/arch/arm/include/asm/page.h | 35 ++++++++++++++++++++++++++------- - 1 file changed, 28 insertions(+), 7 deletions(-) - -diff --git a/xen/arch/arm/include/asm/page.h b/xen/arch/arm/include/asm/page.h -index ebaf5964f114..69f817d1e68a 100644 ---- a/xen/arch/arm/include/asm/page.h -+++ b/xen/arch/arm/include/asm/page.h -@@ -162,6 +162,13 @@ static inline size_t read_dcache_line_bytes(void) - static inline int invalidate_dcache_va_range(const void *p, unsigned long size) - { - size_t cacheline_mask = dcache_line_bytes - 1; -+ unsigned long idx = 0; -+ -+ if ( !size ) -+ return 0; -+ -+ /* Passing a region that wraps around is illegal */ -+ ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p); - - dsb(sy); /* So the CPU issues all writes to the range */ - -@@ -174,11 +181,11 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size) - } - - for ( ; size >= dcache_line_bytes; -- p += dcache_line_bytes, size -= dcache_line_bytes ) -- asm volatile (__invalidate_dcache_one(0) : : "r" (p)); -+ idx += dcache_line_bytes, size -= dcache_line_bytes ) -+ asm volatile (__invalidate_dcache_one(0) : : "r" (p + idx)); - - if ( size > 0 ) -- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); -+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx)); - - dsb(sy); /* So we know the flushes happen before continuing */ - -@@ -188,14 +195,21 @@ static inline int invalidate_dcache_va_range(const void *p, unsigned long size) - static inline int clean_dcache_va_range(const void *p, unsigned long size) - { - size_t cacheline_mask = dcache_line_bytes - 1; -+ unsigned long idx = 0; -+ -+ if ( !size ) -+ return 0; -+ -+ /* Passing a region that wraps around is illegal */ -+ ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p); - - dsb(sy); /* So the CPU issues all writes to the range */ - size += (uintptr_t)p & cacheline_mask; - size = (size + cacheline_mask) & ~cacheline_mask; - p = (void *)((uintptr_t)p & ~cacheline_mask); - for ( ; size >= dcache_line_bytes; -- p += dcache_line_bytes, size -= dcache_line_bytes ) -- asm volatile (__clean_dcache_one(0) : : "r" (p)); -+ idx += dcache_line_bytes, size -= dcache_line_bytes ) -+ asm volatile (__clean_dcache_one(0) : : "r" (p + idx)); - dsb(sy); /* So we know the flushes happen before continuing */ - /* ARM callers assume that dcache_* functions cannot fail. */ - return 0; -@@ -205,14 +219,21 @@ static inline int clean_and_invalidate_dcache_va_range - (const void *p, unsigned long size) - { - size_t cacheline_mask = dcache_line_bytes - 1; -+ unsigned long idx = 0; -+ -+ if ( !size ) -+ return 0; -+ -+ /* Passing a region that wraps around is illegal */ -+ ASSERT(((uintptr_t)p + size - 1) >= (uintptr_t)p); - - dsb(sy); /* So the CPU issues all writes to the range */ - size += (uintptr_t)p & cacheline_mask; - size = (size + cacheline_mask) & ~cacheline_mask; - p = (void *)((uintptr_t)p & ~cacheline_mask); - for ( ; size >= dcache_line_bytes; -- p += dcache_line_bytes, size -= dcache_line_bytes ) -- asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p)); -+ idx += dcache_line_bytes, size -= dcache_line_bytes ) -+ asm volatile (__clean_and_invalidate_dcache_one(0) : : "r" (p + idx)); - dsb(sy); /* So we know the flushes happen before continuing */ - /* ARM callers assume that dcache_* functions cannot fail. */ - return 0; --- -2.40.1 - diff --git a/xsa449.patch b/xsa449.patch deleted file mode 100644 index 80aeac2..0000000 --- a/xsa449.patch +++ /dev/null @@ -1,89 +0,0 @@ -From d8b92b21b224126860978e4c604302f3c1e3bf75 Mon Sep 17 00:00:00 2001 -From: Roger Pau Monne -Date: Wed, 13 Dec 2023 15:51:59 +0100 -Subject: [PATCH] pci: fail device assignment if phantom functions cannot be - assigned -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current behavior is that no error is reported if (some) phantom functions -fail to be assigned during device add or assignment, so the operation succeeds -even if some phantom functions are not correctly setup. - -This can lead to devices possibly being successfully assigned to a domU while -some of the device phantom functions are still assigned to dom0. Even when the -device is assigned domIO before being assigned to a domU phantom functions -might fail to be assigned to domIO, and also fail to be assigned to the domU, -leaving them assigned to dom0. - -Since the device can generate requests using the IDs of those phantom -functions, given the scenario above a device in such state would be in control -of a domU, but still capable of generating transactions that use a context ID -targeting dom0 owned memory. - -Modify device assign in order to attempt to deassign the device if phantom -functions failed to be assigned. - -Note that device addition is not modified in the same way, as in that case the -device is assigned to a trusted domain, and hence partial assign can lead to -device malfunction but not a security issue. - -This is XSA-449 / CVE-2023-46839 - -Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support') -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich ---- - xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------ - 1 file changed, 21 insertions(+), 6 deletions(-) - -diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c -index 1439d1ef2b26..47c0eee7bdcc 100644 ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -1488,11 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) - - pdev->fault.count = 0; - -- if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn, -- pci_to_dev(pdev), flag)) ) -- goto done; -+ rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev), -+ flag); - -- for ( ; pdev->phantom_stride; rc = 0 ) -+ while ( pdev->phantom_stride && !rc ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) -@@ -1503,8 +1502,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) - - done: - if ( rc ) -- printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", -- d, &PCI_SBDF(seg, bus, devfn), rc); -+ { -+ printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n", -+ d, devfn != pdev->devfn ? "phantom function " : "", -+ &PCI_SBDF(seg, bus, devfn), rc); -+ -+ if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) ) -+ { -+ /* -+ * Device with phantom functions that failed to both assign and -+ * rollback. Mark the device as broken and crash the target domain, -+ * as the state of the functions at this point is unknown and Xen -+ * has no way to assert consistent context assignment among them. -+ */ -+ pdev->broken = true; -+ if ( !is_hardware_domain(d) && d != dom_io ) -+ domain_crash(d); -+ } -+ } - /* The device is assigned to dom_io so mark it as quarantined */ - else if ( d == dom_io ) - pdev->quarantine = true; --- -2.43.0 - diff --git a/xsa450.patch b/xsa450.patch deleted file mode 100644 index e94933b..0000000 --- a/xsa450.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Andrew Cooper -Subject: VT-d: Fix "else" vs "#endif" misplacement - -In domain_pgd_maddr() the "#endif" is misplaced with respect to "else". This -generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body -is executed unconditionally. - -Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's -clearer to follow. This in turn involves adjusting p2m_get_pagetable() to -compile when CONFIG_HVM is disabled. - -This is XSA-450 / CVE-2023-46840. - -Reported-by: Reported-by: Teddy Astie -Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only") -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h -index 32f3f394b05a..6ada585eaac2 100644 ---- a/xen/arch/x86/include/asm/p2m.h -+++ b/xen/arch/x86/include/asm/p2m.h -@@ -435,7 +435,14 @@ static inline bool p2m_is_altp2m(const struct p2m_domain *p2m) - return p2m->p2m_class == p2m_alternate; - } - --#define p2m_get_pagetable(p2m) ((p2m)->phys_table) -+#ifdef CONFIG_HVM -+static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m) -+{ -+ return p2m->phys_table; -+} -+#else -+pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m); -+#endif - - /* - * Ensure any deferred p2m TLB flush has been completed on all VCPUs. -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index 99b642f12ef9..4244855032ee 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -438,15 +438,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr, - - if ( pgd_maddr ) - /* nothing */; --#ifdef CONFIG_HVM -- else if ( iommu_use_hap_pt(d) ) -+ else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) ) - { - pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d)); - - pgd_maddr = pagetable_get_paddr(pgt); - } - else --#endif - { - if ( !hd->arch.vtd.pgd_maddr ) - { diff --git a/xsa451-4.18.patch b/xsa451-4.18.patch deleted file mode 100644 index 721f3f3..0000000 --- a/xsa451-4.18.patch +++ /dev/null @@ -1,188 +0,0 @@ -From: Jan Beulich -Subject: x86: account for shadow stack in exception-from-stub recovery - -Dealing with exceptions raised from within emulation stubs involves -discarding return address (replaced by exception related information). -Such discarding of course also requires removing the corresponding entry -from the shadow stack. - -Also amend the comment in fixup_exception_return(), to further clarify -why use of ptr[1] can't be an out-of-bounds access. - -While touching do_invalid_op() also add a missing fall-through -annotation. - -This is CVE-2023-46841 / XSA-451. - -Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible") -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/extable.c -+++ b/xen/arch/x86/extable.c -@@ -86,26 +86,29 @@ search_one_extable(const struct exceptio - } - - unsigned long --search_exception_table(const struct cpu_user_regs *regs) -+search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra) - { - const struct virtual_region *region = find_text_region(regs->rip); - unsigned long stub = this_cpu(stubs.addr); - - if ( region && region->ex ) -+ { -+ *stub_ra = 0; - return search_one_extable(region->ex, region->ex_end, regs->rip); -+ } - - if ( regs->rip >= stub + STUB_BUF_SIZE / 2 && - regs->rip < stub + STUB_BUF_SIZE && - regs->rsp > (unsigned long)regs && - regs->rsp < (unsigned long)get_cpu_info() ) - { -- unsigned long retptr = *(unsigned long *)regs->rsp; -+ unsigned long retaddr = *(unsigned long *)regs->rsp, fixup; - -- region = find_text_region(retptr); -- retptr = region && region->ex -- ? search_one_extable(region->ex, region->ex_end, retptr) -- : 0; -- if ( retptr ) -+ region = find_text_region(retaddr); -+ fixup = region && region->ex -+ ? search_one_extable(region->ex, region->ex_end, retaddr) -+ : 0; -+ if ( fixup ) - { - /* - * Put trap number and error code on the stack (in place of the -@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_ - }; - - *(unsigned long *)regs->rsp = token.raw; -- return retptr; -+ *stub_ra = retaddr; -+ return fixup; - } - } - ---- a/xen/arch/x86/include/asm/uaccess.h -+++ b/xen/arch/x86/include/asm/uaccess.h -@@ -421,7 +421,8 @@ union stub_exception_token { - unsigned long raw; - }; - --extern unsigned long search_exception_table(const struct cpu_user_regs *regs); -+extern unsigned long search_exception_table(const struct cpu_user_regs *regs, -+ unsigned long *stub_ra); - extern void sort_exception_tables(void); - extern void sort_exception_table(struct exception_table_entry *start, - const struct exception_table_entry *stop); ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -845,7 +845,7 @@ void do_unhandled_trap(struct cpu_user_r - } - - static void fixup_exception_return(struct cpu_user_regs *regs, -- unsigned long fixup) -+ unsigned long fixup, unsigned long stub_ra) - { - if ( IS_ENABLED(CONFIG_XEN_SHSTK) ) - { -@@ -862,7 +862,8 @@ static void fixup_exception_return(struc - /* - * Search for %rip. The shstk currently looks like this: - * -- * ... [Likely pointed to by SSP] -+ * tok [Supervisor token, == &tok | BUSY, only with FRED inactive] -+ * ... [Pointed to by SSP for most exceptions, empty in IST cases] - * %cs [== regs->cs] - * %rip [== regs->rip] - * SSP [Likely points to 3 slots higher, above %cs] -@@ -880,7 +881,56 @@ static void fixup_exception_return(struc - */ - if ( ptr[0] == regs->rip && ptr[1] == regs->cs ) - { -+ unsigned long primary_shstk = -+ (ssp & ~(STACK_SIZE - 1)) + -+ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8; -+ - wrss(fixup, ptr); -+ -+ if ( !stub_ra ) -+ goto shstk_done; -+ -+ /* -+ * Stub recovery ought to happen only when the outer context -+ * was on the main shadow stack. We need to also "pop" the -+ * stub's return address from the interrupted context's shadow -+ * stack. That is, -+ * - if we're still on the main stack, we need to move the -+ * entire stack (up to and including the exception frame) -+ * up by one slot, incrementing the original SSP in the -+ * exception frame, -+ * - if we're on an IST stack, we need to increment the -+ * original SSP. -+ */ -+ BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT); -+ -+ if ( (ssp ^ primary_shstk) >> PAGE_SHIFT ) -+ { -+ /* -+ * We're on an IST stack. First make sure the two return -+ * addresses actually match. Then increment the interrupted -+ * context's SSP. -+ */ -+ BUG_ON(stub_ra != *(unsigned long*)ptr[-1]); -+ wrss(ptr[-1] + 8, &ptr[-1]); -+ goto shstk_done; -+ } -+ -+ /* Make sure the two return addresses actually match. */ -+ BUG_ON(stub_ra != ptr[2]); -+ -+ /* Move exception frame, updating SSP there. */ -+ wrss(ptr[1], &ptr[2]); /* %cs */ -+ wrss(ptr[0], &ptr[1]); /* %rip */ -+ wrss(ptr[-1] + 8, &ptr[0]); /* SSP */ -+ -+ /* Move all newer entries. */ -+ while ( --ptr != _p(ssp) ) -+ wrss(ptr[-1], &ptr[0]); -+ -+ /* Finally account for our own stack having shifted up. */ -+ asm volatile ( "incsspd %0" :: "r" (2) ); -+ - goto shstk_done; - } - } -@@ -901,7 +951,8 @@ static void fixup_exception_return(struc - - static bool extable_fixup(struct cpu_user_regs *regs, bool print) - { -- unsigned long fixup = search_exception_table(regs); -+ unsigned long stub_ra = 0; -+ unsigned long fixup = search_exception_table(regs, &stub_ra); - - if ( unlikely(fixup == 0) ) - return false; -@@ -915,7 +966,7 @@ static bool extable_fixup(struct cpu_use - vector_name(regs->entry_vector), regs->error_code, - _p(regs->rip), _p(regs->rip), _p(fixup)); - -- fixup_exception_return(regs, fixup); -+ fixup_exception_return(regs, fixup, stub_ra); - this_cpu(last_extable_addr) = regs->rip; - - return true; -@@ -1183,7 +1234,8 @@ void do_invalid_op(struct cpu_user_regs - { - case BUGFRAME_run_fn: - case BUGFRAME_warn: -- fixup_exception_return(regs, (unsigned long)eip); -+ fixup_exception_return(regs, (unsigned long)eip, 0); -+ fallthrough; - case BUGFRAME_bug: - case BUGFRAME_assert: - return; diff --git a/xsa452-4.18-1.patch b/xsa452-4.18-1.patch deleted file mode 100644 index cae848f..0000000 --- a/xsa452-4.18-1.patch +++ /dev/null @@ -1,304 +0,0 @@ -From: Andrew Cooper -Subject: x86/entry: Introduce EFRAME_* constants - -restore_all_guest() does a lot of manipulation of the stack after popping the -GPRs, and uses raw %rsp displacements to do so. Also, almost all entrypaths -use raw %rsp displacements prior to pushing GPRs. - -Provide better mnemonics, to aid readability and reduce the chance of errors -when editing. - -No functional change. The resulting binary is identical. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035) - -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 57b73a4e6214..2fc4d9130a4d 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -51,6 +51,23 @@ void __dummy__(void) - OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es); - BLANK(); - -+ /* -+ * EFRAME_* is for the entry/exit logic where %rsp is pointing at -+ * UREGS_error_code and GPRs are still/already guest values. -+ */ -+#define OFFSET_EF(sym, mem) \ -+ DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ -+ offsetof(struct cpu_user_regs, error_code)) -+ -+ OFFSET_EF(EFRAME_entry_vector, entry_vector); -+ OFFSET_EF(EFRAME_rip, rip); -+ OFFSET_EF(EFRAME_cs, cs); -+ OFFSET_EF(EFRAME_eflags, eflags); -+ OFFSET_EF(EFRAME_rsp, rsp); -+ BLANK(); -+ -+#undef OFFSET_EF -+ - OFFSET(VCPU_processor, struct vcpu, processor); - OFFSET(VCPU_domain, struct vcpu, domain); - OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info_area.map); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index fcc3a721f147..cb473f08eebd 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -15,7 +15,7 @@ ENTRY(entry_int82) - ENDBR64 - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - pushq $0 -- movl $HYPERCALL_VECTOR, 4(%rsp) -+ movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp) - SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 9a7b129aa7e4..968da9d727b1 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -190,15 +190,15 @@ restore_all_guest: - SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ - - RESTORE_ALL -- testw $TRAP_syscall,4(%rsp) -+ testw $TRAP_syscall, EFRAME_entry_vector(%rsp) - jz iret_exit_to_guest - -- movq 24(%rsp),%r11 # RFLAGS -+ mov EFRAME_eflags(%rsp), %r11 - andq $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11 - orq $X86_EFLAGS_IF,%r11 - - /* Don't use SYSRET path if the return address is not canonical. */ -- movq 8(%rsp),%rcx -+ mov EFRAME_rip(%rsp), %rcx - sarq $47,%rcx - incl %ecx - cmpl $1,%ecx -@@ -213,20 +213,20 @@ restore_all_guest: - ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK - #endif - -- movq 8(%rsp), %rcx # RIP -- cmpw $FLAT_USER_CS32,16(%rsp)# CS -- movq 32(%rsp),%rsp # RSP -+ mov EFRAME_rip(%rsp), %rcx -+ cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) -+ mov EFRAME_rsp(%rsp), %rsp - je 1f - sysretq - 1: sysretl - - ALIGN - .Lrestore_rcx_iret_exit_to_guest: -- movq 8(%rsp), %rcx # RIP -+ mov EFRAME_rip(%rsp), %rcx - /* No special register assumptions. */ - iret_exit_to_guest: -- andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp) -- orl $X86_EFLAGS_IF,24(%rsp) -+ andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) -+ orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) - addq $8,%rsp - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -@@ -257,7 +257,7 @@ ENTRY(lstar_enter) - pushq $FLAT_KERNEL_CS64 - pushq %rcx - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -294,7 +294,7 @@ ENTRY(cstar_enter) - pushq $FLAT_USER_CS32 - pushq %rcx - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved) - pushq $3 /* ring 3 null cs */ - pushq $0 /* null rip */ - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap) - ENDBR64 - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - pushq $0 -- movl $0x80, 4(%rsp) -+ movl $0x80, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -649,7 +649,7 @@ ret_from_intr: - .section .init.text, "ax", @progbits - ENTRY(early_page_fault) - ENDBR64 -- movl $X86_EXC_PF, 4(%rsp) -+ movl $X86_EXC_PF, EFRAME_entry_vector(%rsp) - SAVE_ALL - movq %rsp, %rdi - call do_early_page_fault -@@ -716,7 +716,7 @@ ENTRY(common_interrupt) - - ENTRY(entry_PF) - ENDBR64 -- movl $X86_EXC_PF, 4(%rsp) -+ movl $X86_EXC_PF, EFRAME_entry_vector(%rsp) - /* No special register assumptions. */ - GLOBAL(handle_exception) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP -@@ -890,90 +890,90 @@ FATAL_exception_with_ints_disabled: - ENTRY(entry_DE) - ENDBR64 - pushq $0 -- movl $X86_EXC_DE, 4(%rsp) -+ movl $X86_EXC_DE, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_MF) - ENDBR64 - pushq $0 -- movl $X86_EXC_MF, 4(%rsp) -+ movl $X86_EXC_MF, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_XM) - ENDBR64 - pushq $0 -- movl $X86_EXC_XM, 4(%rsp) -+ movl $X86_EXC_XM, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_NM) - ENDBR64 - pushq $0 -- movl $X86_EXC_NM, 4(%rsp) -+ movl $X86_EXC_NM, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_DB) - ENDBR64 - pushq $0 -- movl $X86_EXC_DB, 4(%rsp) -+ movl $X86_EXC_DB, EFRAME_entry_vector(%rsp) - jmp handle_ist_exception - - ENTRY(entry_BP) - ENDBR64 - pushq $0 -- movl $X86_EXC_BP, 4(%rsp) -+ movl $X86_EXC_BP, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_OF) - ENDBR64 - pushq $0 -- movl $X86_EXC_OF, 4(%rsp) -+ movl $X86_EXC_OF, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_BR) - ENDBR64 - pushq $0 -- movl $X86_EXC_BR, 4(%rsp) -+ movl $X86_EXC_BR, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_UD) - ENDBR64 - pushq $0 -- movl $X86_EXC_UD, 4(%rsp) -+ movl $X86_EXC_UD, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_TS) - ENDBR64 -- movl $X86_EXC_TS, 4(%rsp) -+ movl $X86_EXC_TS, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_NP) - ENDBR64 -- movl $X86_EXC_NP, 4(%rsp) -+ movl $X86_EXC_NP, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_SS) - ENDBR64 -- movl $X86_EXC_SS, 4(%rsp) -+ movl $X86_EXC_SS, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_GP) - ENDBR64 -- movl $X86_EXC_GP, 4(%rsp) -+ movl $X86_EXC_GP, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_AC) - ENDBR64 -- movl $X86_EXC_AC, 4(%rsp) -+ movl $X86_EXC_AC, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_CP) - ENDBR64 -- movl $X86_EXC_CP, 4(%rsp) -+ movl $X86_EXC_CP, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_DF) - ENDBR64 -- movl $X86_EXC_DF, 4(%rsp) -+ movl $X86_EXC_DF, EFRAME_entry_vector(%rsp) - /* Set AC to reduce chance of further SMAP faults */ - ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP - SAVE_ALL -@@ -998,7 +998,7 @@ ENTRY(entry_DF) - ENTRY(entry_NMI) - ENDBR64 - pushq $0 -- movl $X86_EXC_NMI, 4(%rsp) -+ movl $X86_EXC_NMI, EFRAME_entry_vector(%rsp) - handle_ist_exception: - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - SAVE_ALL -@@ -1130,7 +1130,7 @@ handle_ist_exception: - ENTRY(entry_MC) - ENDBR64 - pushq $0 -- movl $X86_EXC_MC, 4(%rsp) -+ movl $X86_EXC_MC, EFRAME_entry_vector(%rsp) - jmp handle_ist_exception - - /* No op trap handler. Required for kexec crash path. */ -@@ -1167,7 +1167,7 @@ autogen_stubs: /* Automatically generated stubs. */ - 1: - ENDBR64 - pushq $0 -- movb $vec,4(%rsp) -+ movb $vec, EFRAME_entry_vector(%rsp) - jmp common_interrupt - - entrypoint 1b -@@ -1181,7 +1181,7 @@ autogen_stubs: /* Automatically generated stubs. */ - test $8,%spl /* 64bit exception frames are 16 byte aligned, but the word */ - jz 2f /* size is 8 bytes. Check whether the processor gave us an */ - pushq $0 /* error code, and insert an empty one if not. */ --2: movb $vec,4(%rsp) -+2: movb $vec, EFRAME_entry_vector(%rsp) - jmp handle_exception - - entrypoint 1b diff --git a/xsa452-4.18-2.patch b/xsa452-4.18-2.patch deleted file mode 100644 index 4535397..0000000 --- a/xsa452-4.18-2.patch +++ /dev/null @@ -1,90 +0,0 @@ -From: Andrew Cooper -Subject: x86: Resync intel-family.h from Linux - -From v6.8-rc6 - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 195e75371b13c4f7ecdf7b5c50aed0d02f2d7ce8) - -diff --git a/xen/arch/x86/include/asm/intel-family.h b/xen/arch/x86/include/asm/intel-family.h -index ffc49151befe..b65e9c46b922 100644 ---- a/xen/arch/x86/include/asm/intel-family.h -+++ b/xen/arch/x86/include/asm/intel-family.h -@@ -26,6 +26,9 @@ - * _G - parts with extra graphics on - * _X - regular server parts - * _D - micro server parts -+ * _N,_P - other mobile parts -+ * _H - premium mobile parts -+ * _S - other client parts - * - * Historical OPTDIFFs: - * -@@ -37,6 +40,9 @@ - * their own names :-( - */ - -+/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ -+#define INTEL_FAM6_ANY X86_MODEL_ANY -+ - #define INTEL_FAM6_CORE_YONAH 0x0E - - #define INTEL_FAM6_CORE2_MEROM 0x0F -@@ -93,8 +99,6 @@ - #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ - #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ - --#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ -- - #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ - - #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ -@@ -102,12 +106,31 @@ - - #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ - -+#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF -+ -+#define INTEL_FAM6_GRANITERAPIDS_X 0xAD -+#define INTEL_FAM6_GRANITERAPIDS_D 0xAE -+ -+/* "Hybrid" Processors (P-Core/E-Core) */ -+ -+#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ -+ - #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ - #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ - --#define INTEL_FAM6_RAPTORLAKE 0xB7 -+#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ -+#define INTEL_FAM6_RAPTORLAKE_P 0xBA -+#define INTEL_FAM6_RAPTORLAKE_S 0xBF -+ -+#define INTEL_FAM6_METEORLAKE 0xAC -+#define INTEL_FAM6_METEORLAKE_L 0xAA -+ -+#define INTEL_FAM6_ARROWLAKE_H 0xC5 -+#define INTEL_FAM6_ARROWLAKE 0xC6 -+ -+#define INTEL_FAM6_LUNARLAKE_M 0xBD - --/* "Small Core" Processors (Atom) */ -+/* "Small Core" Processors (Atom/E-Core) */ - - #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ - #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ -@@ -134,6 +157,13 @@ - #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ - #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ - -+#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ -+ -+#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ -+#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ -+ -+#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ -+ - /* Xeon Phi */ - - #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ diff --git a/xsa452-4.18-3.patch b/xsa452-4.18-3.patch deleted file mode 100644 index bc9059c..0000000 --- a/xsa452-4.18-3.patch +++ /dev/null @@ -1,135 +0,0 @@ -From: Andrew Cooper -Subject: x86/vmx: Perform VERW flushing later in the VMExit path - -Broken out of the following patch because this change is subtle enough on its -own. See it for the rational of why we're moving VERW. - -As for how, extend the trick already used to hold one condition in -flags (RESUME vs LAUNCH) through the POPing of GPRs. - -Move the MOV CR earlier. Intel specify flags to be undefined across it. - -Encode the two conditions we want using SF and PF. See the code comment for -exactly how. - -Leave a comment to explain the lack of any content around -SPEC_CTRL_EXIT_TO_VMX, but leave the block in place. Sods law says if we -delete it, we'll need to reintroduce it. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f) - -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index e3f60d5a82f7..1bead826caa3 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -87,17 +87,39 @@ UNLIKELY_END(realmode) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ - /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */ -- DO_SPEC_CTRL_COND_VERW -+ /* -+ * All speculation safety work happens to be elsewhere. VERW is after -+ * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left -+ * to the MSR load list. -+ */ - - mov VCPU_hvm_guest_cr2(%rbx),%rax -+ mov %rax, %cr2 -+ -+ /* -+ * We need to perform two conditional actions (VERW, and Resume vs -+ * Launch) after popping GPRs. With some cunning, we can encode both -+ * of these in eflags together. -+ * -+ * Parity is only calculated over the bottom byte of the answer, while -+ * Sign is simply the top bit. -+ * -+ * Therefore, the final OR instruction ends up producing: -+ * SF = VCPU_vmx_launched -+ * PF = !SCF_verw -+ */ -+ BUILD_BUG_ON(SCF_verw & ~0xff) -+ movzbl VCPU_vmx_launched(%rbx), %ecx -+ shl $31, %ecx -+ movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax -+ and $SCF_verw, %eax -+ or %eax, %ecx - - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp -- mov %rax,%cr2 -- cmpb $0,VCPU_vmx_launched(%rbx) - pop %rbx - pop %r11 - pop %r10 -@@ -108,7 +130,13 @@ UNLIKELY_END(realmode) - pop %rdx - pop %rsi - pop %rdi -- je .Lvmx_launch -+ -+ jpe .L_skip_verw -+ /* VERW clobbers ZF, but preserves all others, including SF. */ -+ verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp) -+.L_skip_verw: -+ -+ jns .Lvmx_launch - - /*.Lvmx_resume:*/ - VMRESUME -diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h -index baaaccb26e17..56ae26e54265 100644 ---- a/xen/arch/x86/include/asm/asm_defns.h -+++ b/xen/arch/x86/include/asm/asm_defns.h -@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp"); - - #ifdef __ASSEMBLY__ - -+.macro BUILD_BUG_ON condstr, cond:vararg -+ .if \cond -+ .error "Condition \"\condstr\" not satisfied" -+ .endif -+.endm -+/* preprocessor macro to make error message more user friendly */ -+#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond -+ - #ifdef HAVE_AS_QUOTED_SYM - #define SUBSECTION_LBL(tag) \ - .ifndef .L.tag; \ -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 6cb7c1b9491e..525745a06608 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -152,6 +152,13 @@ - #endif - .endm - -+/* -+ * Helper to improve the readibility of stack dispacements with %rsp in -+ * unusual positions. Both @field and @top_of_stack should be constants from -+ * the same object. @top_of_stack should be where %rsp is currently pointing. -+ */ -+#define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) -+ - .macro DO_SPEC_CTRL_COND_VERW - /* - * Requires %rsp=cpuinfo -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 2fc4d9130a4d..0d336788989f 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -135,6 +135,7 @@ void __dummy__(void) - #endif - - OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); -+ OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); - OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); diff --git a/xsa452-4.18-4.patch b/xsa452-4.18-4.patch deleted file mode 100644 index 0ccff77..0000000 --- a/xsa452-4.18-4.patch +++ /dev/null @@ -1,197 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Perform VERW flushing later in exit paths - -On parts vulnerable to RFDS, VERW's side effects are extended to scrub all -non-architectural entries in various Physical Register Files. To remove all -of Xen's values, the VERW must be after popping the GPRs. - -Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position, -but with overrides for other contexts. Identify that it clobbers eflags; this -is particularly relevant for the SYSRET path. - -For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a -shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to -issue the VERW. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index 525745a06608..13acebc75dff 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -159,16 +159,23 @@ - */ - #define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) - --.macro DO_SPEC_CTRL_COND_VERW -+.macro SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) - /* -- * Requires %rsp=cpuinfo -+ * Requires \scf and \sel as %rsp-relative expressions -+ * Clobbers eflags -+ * -+ * VERW needs to run after guest GPRs have been restored, where only %rsp is -+ * good to use. Default to expecting %rsp pointing at CPUINFO_error_code. -+ * Contexts where this is not true must provide an alternative \scf and \sel. - * - * Issue a VERW for its flushing side effect, if indicated. This is a Spectre - * v1 gadget, but the IRET/VMEntry is serialising. - */ -- testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp) -+ testb $SCF_verw, \scf(%rsp) - jz .L\@_verw_skip -- verw CPUINFO_verw_sel(%rsp) -+ verw \sel(%rsp) - .L\@_verw_skip: - .endm - -@@ -286,8 +293,6 @@ - */ - ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV - -- DO_SPEC_CTRL_COND_VERW -- - ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - .endm - -@@ -367,7 +372,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* -- * Requires %r12=ist_exit, %r14=stack_end -+ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs - * Clobbers %rax, %rbx, %rcx, %rdx - */ - movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx -@@ -395,11 +400,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - test %r12, %r12 - jz .L\@_skip_ist_exit - -- /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ -- testb $SCF_verw, %bl -- jz .L\@_skip_verw -- verw STACK_CPUINFO_FIELD(verw_sel)(%r14) --.L\@_skip_verw: -+ /* -+ * Stash SCF and verw_sel above eflags in the case of an IST_exit. The -+ * VERW logic needs to run after guest GPRs have been restored; i.e. where -+ * we cannot use %r12 or %r14 for the purposes they have here. -+ * -+ * When the CPU pushed this exception frame, it zero-extended eflags. -+ * Therefore it is safe for the VERW logic to look at the stashed SCF -+ * outside of the ist_exit condition. Also, this stashing won't influence -+ * any other restore_all_guest() paths. -+ */ -+ or $(__HYPERVISOR_DS32 << 16), %ebx -+ mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */ - - ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 0d336788989f..85c7d0c98967 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -55,14 +55,22 @@ void __dummy__(void) - * EFRAME_* is for the entry/exit logic where %rsp is pointing at - * UREGS_error_code and GPRs are still/already guest values. - */ --#define OFFSET_EF(sym, mem) \ -+#define OFFSET_EF(sym, mem, ...) \ - DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ -- offsetof(struct cpu_user_regs, error_code)) -+ offsetof(struct cpu_user_regs, error_code) __VA_ARGS__) - - OFFSET_EF(EFRAME_entry_vector, entry_vector); - OFFSET_EF(EFRAME_rip, rip); - OFFSET_EF(EFRAME_cs, cs); - OFFSET_EF(EFRAME_eflags, eflags); -+ -+ /* -+ * These aren't real fields. They're spare space, used by the IST -+ * exit-to-xen path. -+ */ -+ OFFSET_EF(EFRAME_shadow_scf, eflags, +4); -+ OFFSET_EF(EFRAME_shadow_sel, eflags, +6); -+ - OFFSET_EF(EFRAME_rsp, rsp); - BLANK(); - -@@ -136,6 +144,7 @@ void __dummy__(void) - - OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); - OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); -+ OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip); - OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index cb473f08eebd..3bbe3a79a5b7 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest) - SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ - - RESTORE_ALL adj=8 compat=1 -+ -+ /* Account for ev/ec having already been popped off the stack. */ -+ SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) -+ - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 968da9d727b1..2c7512130f49 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -214,6 +214,9 @@ restore_all_guest: - #endif - - mov EFRAME_rip(%rsp), %rcx -+ -+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ -+ - cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) - mov EFRAME_rsp(%rsp), %rsp - je 1f -@@ -227,6 +230,9 @@ restore_all_guest: - iret_exit_to_guest: - andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) - orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) -+ -+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ -+ - addq $8,%rsp - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */ - - RESTORE_ALL adj=8 -+ -+ /* -+ * When the CPU pushed this exception frame, it zero-extended eflags. -+ * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of -+ * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs, -+ * and we're at a random place on the stack, not in a CPUFINFO block. -+ * -+ * Account for ev/ec having already been popped off the stack. -+ */ -+ SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \ -+ sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip) -+ - iretq - - ENTRY(common_interrupt) diff --git a/xsa452-4.18-5.patch b/xsa452-4.18-5.patch deleted file mode 100644 index d55e454..0000000 --- a/xsa452-4.18-5.patch +++ /dev/null @@ -1,239 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Rename VERW related options - -VERW is going to be used for a 3rd purpose, and the existing nomenclature -didn't survive the Stale MMIO issues terribly well. - -Rename the command line option from `md-clear=` to `verw=`. This is more -consistent with other options which tend to be named based on what they're -doing, not which feature enumeration they use behind the scenes. Retain -`md-clear=` as a deprecated alias. - -Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio}, -which has a side effect of making spec_ctrl_init_domain() rather clearer to -follow. - -No functional change. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 582d6741d182..fbf16839249a 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2370,7 +2370,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ , xen=, {pv,hvm}=, --> {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, -+> {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio,gds-mit,div-scrub}= ]` -@@ -2395,7 +2395,7 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options - offer fine grained control over the primitives by Xen. These impact Xen's - ability to protect itself, and/or Xen's ability to virtualise support for - guests to use. -@@ -2412,11 +2412,12 @@ guests to use. - guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. - * `rsb=` offers control over whether to overwrite the Return Stack Buffer / - Return Address Stack on entry to Xen and on idle. --* `md-clear=` offers control over whether to use VERW to flush -- microarchitectural buffers on idle and exit from Xen. *Note: For -- compatibility with development versions of this fix, `mds=` is also accepted -- on Xen 4.12 and earlier as an alias. Consult vendor documentation in -- preference to here.* -+* `verw=` offers control over whether to use VERW for its scrubbing side -+ effects at appropriate privilege transitions. The exact side effects are -+ microarchitecture and microcode specific. *Note: `md-clear=` is accepted as -+ a deprecated alias. For compatibility with development versions of XSA-297, -+ `mds=` is also accepted on Xen 4.12 and earlier as an alias. Consult vendor -+ documentation in preference to here.* - * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction - Barrier) is used on entry to Xen. This is used by default on hardware - vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index a965b6db28ba..c42d8cdc22d6 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -25,8 +25,8 @@ static bool __initdata opt_msr_sc_pv = true; - static bool __initdata opt_msr_sc_hvm = true; - static int8_t __initdata opt_rsb_pv = -1; - static bool __initdata opt_rsb_hvm = true; --static int8_t __ro_after_init opt_md_clear_pv = -1; --static int8_t __ro_after_init opt_md_clear_hvm = -1; -+static int8_t __ro_after_init opt_verw_pv = -1; -+static int8_t __ro_after_init opt_verw_hvm = -1; - - static int8_t __ro_after_init opt_ibpb_entry_pv = -1; - static int8_t __ro_after_init opt_ibpb_entry_hvm = -1; -@@ -66,7 +66,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. - - static int8_t __initdata opt_srb_lock = -1; - static bool __initdata opt_unpriv_mmio; --static bool __ro_after_init opt_fb_clear_mmio; -+static bool __ro_after_init opt_verw_mmio; - static int8_t __initdata opt_gds_mit = -1; - static int8_t __initdata opt_div_scrub = -1; - -@@ -108,8 +108,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - disable_common: - opt_rsb_pv = false; - opt_rsb_hvm = false; -- opt_md_clear_pv = 0; -- opt_md_clear_hvm = 0; -+ opt_verw_pv = 0; -+ opt_verw_hvm = 0; - opt_ibpb_entry_pv = 0; - opt_ibpb_entry_hvm = 0; - opt_ibpb_entry_dom0 = false; -@@ -140,14 +140,14 @@ static int __init cf_check parse_spec_ctrl(const char *s) - { - opt_msr_sc_pv = val; - opt_rsb_pv = val; -- opt_md_clear_pv = val; -+ opt_verw_pv = val; - opt_ibpb_entry_pv = val; - } - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) - { - opt_msr_sc_hvm = val; - opt_rsb_hvm = val; -- opt_md_clear_hvm = val; -+ opt_verw_hvm = val; - opt_ibpb_entry_hvm = val; - } - else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) -@@ -192,21 +192,22 @@ static int __init cf_check parse_spec_ctrl(const char *s) - break; - } - } -- else if ( (val = parse_boolean("md-clear", s, ss)) != -1 ) -+ else if ( (val = parse_boolean("verw", s, ss)) != -1 || -+ (val = parse_boolean("md-clear", s, ss)) != -1 ) - { - switch ( val ) - { - case 0: - case 1: -- opt_md_clear_pv = opt_md_clear_hvm = val; -+ opt_verw_pv = opt_verw_hvm = val; - break; - - case -2: -- s += strlen("md-clear="); -+ s += (*s == 'v') ? strlen("verw=") : strlen("md-clear="); - if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -- opt_md_clear_pv = val; -+ opt_verw_pv = val; - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -- opt_md_clear_hvm = val; -+ opt_verw_hvm = val; - else - default: - rc = -EINVAL; -@@ -528,8 +529,8 @@ static void __init print_details(enum ind_thunk thunk) - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", - opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", -- opt_md_clear_pv || opt_md_clear_hvm || -- opt_fb_clear_mmio ? " VERW" : "", -+ opt_verw_pv || opt_verw_hvm || -+ opt_verw_mmio ? " VERW" : "", - opt_div_scrub ? " DIV" : "", - opt_branch_harden ? " BRANCH_HARDEN" : ""); - -@@ -550,13 +551,13 @@ static void __init print_details(enum ind_thunk thunk) - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || - amd_virt_spec_ctrl || -- opt_eager_fpu || opt_md_clear_hvm) ? "" : " None", -+ opt_eager_fpu || opt_verw_hvm) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", - (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || - amd_virt_spec_ctrl) ? " MSR_VIRT_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- opt_md_clear_hvm ? " MD_CLEAR" : "", -+ opt_verw_hvm ? " VERW" : "", - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); - - #endif -@@ -565,11 +566,11 @@ static void __init print_details(enum ind_thunk thunk) - (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || - boot_cpu_has(X86_FEATURE_SC_RSB_PV) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || -- opt_eager_fpu || opt_md_clear_pv) ? "" : " None", -+ opt_eager_fpu || opt_verw_pv) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- opt_md_clear_pv ? " MD_CLEAR" : "", -+ opt_verw_pv ? " VERW" : "", - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); - - printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", -@@ -1502,8 +1503,8 @@ void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); - -- bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || -- (opt_fb_clear_mmio && is_iommu_enabled(d))); -+ bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) || -+ (opt_verw_mmio && is_iommu_enabled(d))); - - bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && - (d->domain_id != 0 || opt_ibpb_entry_dom0)); -@@ -1866,19 +1867,20 @@ void __init init_speculation_mitigations(void) - * the return-to-guest path. - */ - if ( opt_unpriv_mmio ) -- opt_fb_clear_mmio = cpu_has_fb_clear; -+ opt_verw_mmio = cpu_has_fb_clear; - - /* - * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. - * This will only be a token effort for MLPDS/MFBDS when HT is enabled, - * but it is somewhat better than nothing. - */ -- if ( opt_md_clear_pv == -1 ) -- opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- boot_cpu_has(X86_FEATURE_MD_CLEAR)); -- if ( opt_md_clear_hvm == -1 ) -- opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- boot_cpu_has(X86_FEATURE_MD_CLEAR)); -+ if ( opt_verw_pv == -1 ) -+ opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -+ cpu_has_md_clear); -+ -+ if ( opt_verw_hvm == -1 ) -+ opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -+ cpu_has_md_clear); - - /* - * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -@@ -1891,12 +1893,12 @@ void __init init_speculation_mitigations(void) - * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) - * - * After calculating the appropriate idle setting, simplify -- * opt_md_clear_hvm to mean just "should we VERW on the way into HVM -+ * opt_verw_hvm to mean just "should we VERW on the way into HVM - * guests", so spec_ctrl_init_domain() can calculate suitable settings. - */ -- if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) -+ if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; -+ opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT diff --git a/xsa452-4.18-6.patch b/xsa452-4.18-6.patch deleted file mode 100644 index 422dc62..0000000 --- a/xsa452-4.18-6.patch +++ /dev/null @@ -1,163 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: VERW-handling adjustments - -... before we add yet more complexity to this logic. Mostly expanded -comments, but with three minor changes. - -1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and - future ones. - -2) We only ever need SC_VERW_IDLE when SMT is active. If SMT isn't active, - then there's no re-partition of pipeline resources based on thread-idleness - to worry about. - -3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as - it turns out, wrong. SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush - is the relevant decision of whether to use L1D_FLUSH based on - susceptibility and user preference. - - Rewrite the logic so it can be followed, and incorporate the fact that when - FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index c42d8cdc22d6..a4afcd8570e2 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1519,7 +1519,7 @@ void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; - bool has_spec_ctrl, ibrs = false, hw_smt_enabled; -- bool cpu_has_bug_taa, retpoline_safe; -+ bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe; - - hw_smt_enabled = check_smt_enabled(); - -@@ -1855,50 +1855,97 @@ void __init init_speculation_mitigations(void) - "enabled. Please assess your configuration and choose an\n" - "explicit 'smt=' setting. See XSA-273.\n"); - -+ /* -+ * A brief summary of VERW-related changes. -+ * -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html -+ * -+ * Relevant ucodes: -+ * -+ * - May 2019, for MDS. Introduces the MD_CLEAR CPUID bit and VERW side -+ * effects to scrub Store/Load/Fill buffers as applicable. MD_CLEAR -+ * exists architecturally, even when the side effects have been removed. -+ * -+ * Use VERW to scrub on return-to-guest. Parts with L1D_FLUSH to -+ * mitigate L1TF have the same side effect, so no need to do both. -+ * -+ * Various Atoms suffer from Store-buffer sampling only. Store buffers -+ * are statically partitioned between non-idle threads, so scrubbing is -+ * wanted when going idle too. -+ * -+ * Load ports and Fill buffers are competitively shared between threads. -+ * SMT must be disabled for VERW scrubbing to be fully effective. -+ * -+ * - November 2019, for TAA. Extended VERW side effects to TSX-enabled -+ * MDS_NO parts. -+ * -+ * - February 2022, for Client TSX de-feature. Removed VERW side effects -+ * from Client CPUs only. -+ * -+ * - May 2022, for MMIO Stale Data. (Re)introduced Fill Buffer scrubbing -+ * on all MMIO-affected parts which didn't already have it for MDS -+ * reasons, enumerating FB_CLEAR on those parts only. -+ * -+ * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing -+ * side effects as VERW and cannot be used in its place. -+ */ - mds_calculations(); - - /* -- * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have -- * reintroduced the VERW fill buffer flushing side effect because of a -- * susceptibility to FBSDP. -+ * Parts which enumerate FB_CLEAR are those with now-updated microcode -+ * which weren't susceptible to the original MFBDS (and therefore didn't -+ * have Fill Buffer scrubbing side effects to begin with, or were Client -+ * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had -+ * the scrubbing reintroduced because of a susceptibility to FBSDP. - * - * If unprivileged guests have (or will have) MMIO mappings, we can - * mitigate cross-domain leakage of fill buffer data by issuing VERW on -- * the return-to-guest path. -+ * the return-to-guest path. This is only a token effort if SMT is -+ * active. - */ - if ( opt_unpriv_mmio ) - opt_verw_mmio = cpu_has_fb_clear; - - /* -- * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. -- * This will only be a token effort for MLPDS/MFBDS when HT is enabled, -- * but it is somewhat better than nothing. -+ * MD_CLEAR is enumerated architecturally forevermore, even after the -+ * scrubbing side effects have been removed. Create ourselves an version -+ * which expressed whether we think MD_CLEAR is having any useful side -+ * effect. -+ */ -+ cpu_has_useful_md_clear = (cpu_has_md_clear && -+ (cpu_has_bug_mds || cpu_has_bug_msbds_only)); -+ -+ /* -+ * By default, use VERW scrubbing on applicable hardware, if we think it's -+ * going to have an effect. This will only be a token effort for -+ * MLPDS/MFBDS when SMT is enabled. - */ - if ( opt_verw_pv == -1 ) -- opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- cpu_has_md_clear); -+ opt_verw_pv = cpu_has_useful_md_clear; - - if ( opt_verw_hvm == -1 ) -- opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- cpu_has_md_clear); -+ opt_verw_hvm = cpu_has_useful_md_clear; - - /* -- * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -- * either the PV or HVM MDS defences are used, or if we may give MMIO -- * access to untrusted guests. -- * -- * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with -- * equivalent semantics to avoid needing to perform both flushes on the -- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for -- * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) -- * -- * After calculating the appropriate idle setting, simplify -- * opt_verw_hvm to mean just "should we VERW on the way into HVM -- * guests", so spec_ctrl_init_domain() can calculate suitable settings. -+ * If SMT is active, and we're protecting against MDS or MMIO stale data, -+ * we need to scrub before going idle as well as on return to guest. -+ * Various pipeline resources are repartitioned amongst non-idle threads. - */ -- if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) -+ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || -+ opt_verw_mmio) && hw_smt_enabled ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; -+ -+ /* -+ * After calculating the appropriate idle setting, simplify opt_verw_hvm -+ * to mean just "should we VERW on the way into HVM guests", so -+ * spec_ctrl_init_domain() can calculate suitable settings. -+ * -+ * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the -+ * only *_CLEAR we can see. -+ */ -+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) -+ opt_verw_hvm = false; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT diff --git a/xsa452-4.18-7.patch b/xsa452-4.18-7.patch deleted file mode 100644 index a252db0..0000000 --- a/xsa452-4.18-7.patch +++ /dev/null @@ -1,307 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Mitigation Register File Data Sampling - -RFDS affects Atom cores, also branded E-cores, between the Goldmont and -Gracemont microarchitectures. This includes Alder Lake and Raptor Lake hybrid -clien systems which have a mix of Gracemont and other types of cores. - -Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side -effets, and RFDS_NO to incidate that the system is unaffected. Plenty of -unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we -synthesise it. Alder Lake and Raptor Lake Xeon-E's are unaffected due to -their platform configuration, and we must use the Hybrid CPUID bit to -distinguish them from their non-Xeon counterparts. - -Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so -set it in the max policies and reflect the host setting in default. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index 7370f1b56ef9..52e451a806c1 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -172,7 +172,7 @@ static const char *const str_7d0[32] = - [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl", - [10] = "md-clear", [11] = "rtm-always-abort", - /* 12 */ [13] = "tsx-force-abort", -- [14] = "serialize", -+ [14] = "serialize", [15] = "hybrid", - [16] = "tsxldtrk", - [18] = "pconfig", - [20] = "cet-ibt", -@@ -245,7 +245,8 @@ static const char *const str_m10Al[32] = - [20] = "bhi-no", [21] = "xapic-status", - /* 22 */ [23] = "ovrclk-status", - [24] = "pbrsb-no", [25] = "gds-ctrl", -- [26] = "gds-no", -+ [26] = "gds-no", [27] = "rfds-no", -+ [28] = "rfds-clear", - }; - - static const char *const str_m10Ah[32] = -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index c7c5e99b7b4c..12e621b97de6 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -451,6 +451,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - */ - __set_bit(X86_FEATURE_MD_CLEAR, fs); - __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); - - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an -@@ -500,6 +501,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - if ( cpu_has_fb_clear ) - __set_bit(X86_FEATURE_FB_CLEAR, fs); - -+ __clear_bit(X86_FEATURE_RFDS_CLEAR, fs); -+ if ( cpu_has_rfds_clear ) -+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index 76ef2aeb1de6..3c57f55de075 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -181,6 +181,7 @@ static inline bool boot_cpu_has(unsigned int feat) - #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) - #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) - #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) -+#define cpu_has_hybrid boot_cpu_has(X86_FEATURE_HYBRID) - #define cpu_has_avx512_fp16 boot_cpu_has(X86_FEATURE_AVX512_FP16) - #define cpu_has_arch_caps boot_cpu_has(X86_FEATURE_ARCH_CAPS) - -@@ -208,6 +209,8 @@ static inline bool boot_cpu_has(unsigned int feat) - #define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA) - #define cpu_has_gds_ctrl boot_cpu_has(X86_FEATURE_GDS_CTRL) - #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO) -+#define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO) -+#define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR) - - /* Synthesized. */ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h -index 82a81bd0a232..85ef28a612e0 100644 ---- a/xen/arch/x86/include/asm/msr-index.h -+++ b/xen/arch/x86/include/asm/msr-index.h -@@ -89,6 +89,8 @@ - #define ARCH_CAPS_PBRSB_NO (_AC(1, ULL) << 24) - #define ARCH_CAPS_GDS_CTRL (_AC(1, ULL) << 25) - #define ARCH_CAPS_GDS_NO (_AC(1, ULL) << 26) -+#define ARCH_CAPS_RFDS_NO (_AC(1, ULL) << 27) -+#define ARCH_CAPS_RFDS_CLEAR (_AC(1, ULL) << 28) - - #define MSR_FLUSH_CMD 0x0000010b - #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index a4afcd8570e2..8165379fed94 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -12,6 +12,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -435,7 +436,7 @@ static void __init print_details(enum ind_thunk thunk) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -451,6 +452,7 @@ static void __init print_details(enum ind_thunk thunk) - (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", - (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "", - (caps & ARCH_CAPS_GDS_NO) ? " GDS_NO" : "", -+ (caps & ARCH_CAPS_RFDS_NO) ? " RFDS_NO" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", -@@ -461,7 +463,7 @@ static void __init print_details(enum ind_thunk thunk) - (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : ""); - - /* Hardware features which need driving to mitigate issues. */ -- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || - (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || -@@ -479,6 +481,7 @@ static void __init print_details(enum ind_thunk thunk) - (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", - (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : "", - (caps & ARCH_CAPS_GDS_CTRL) ? " GDS_CTRL" : "", -+ (caps & ARCH_CAPS_RFDS_CLEAR) ? " RFDS_CLEAR" : "", - (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); - - /* Compiled-in support which pertains to mitigations. */ -@@ -1347,6 +1350,83 @@ static __init void mds_calculations(void) - } - } - -+/* -+ * Register File Data Sampling affects Atom cores from the Goldmont to -+ * Gracemont microarchitectures. The March 2024 microcode adds RFDS_NO to -+ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still -+ * in support. -+ * -+ * Alder Lake and Raptor Lake client CPUs have a mix of P cores -+ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont, -+ * vulnerable), and both enumerate RFDS_CLEAR. -+ * -+ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by -+ * platform configuration, and enumerate RFDS_NO. -+ * -+ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when -+ * safe to do so. -+ * -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html -+ */ -+static void __init rfds_calculations(void) -+{ -+ /* RFDS is only known to affect Intel Family 6 processors at this time. */ -+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || -+ boot_cpu_data.x86 != 6 ) -+ return; -+ -+ /* -+ * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable -+ * microcode, or an RFDS-aware hypervisor is levelling us in a pool. -+ */ -+ if ( cpu_has_rfds_no || cpu_has_rfds_clear ) -+ return; -+ -+ /* If we're virtualised, don't attempt to synthesise RFDS_NO. */ -+ if ( cpu_has_hypervisor ) -+ return; -+ -+ /* -+ * Not all CPUs are expected to get a microcode update enumerating one of -+ * RFDS_{NO,CLEAR}, or we might have out-of-date microcode. -+ */ -+ switch ( boot_cpu_data.x86_model ) -+ { -+ case INTEL_FAM6_ALDERLAKE: -+ case INTEL_FAM6_RAPTORLAKE: -+ /* -+ * Alder Lake and Raptor Lake might be a client SKU (with the -+ * Gracemont cores active, and therefore vulnerable) or might be a -+ * server SKU (with the Gracemont cores disabled, and therefore not -+ * vulnerable). -+ * -+ * See if the CPU identifies as hybrid to distinguish the two cases. -+ */ -+ if ( !cpu_has_hybrid ) -+ break; -+ fallthrough; -+ case INTEL_FAM6_ALDERLAKE_L: -+ case INTEL_FAM6_RAPTORLAKE_P: -+ case INTEL_FAM6_RAPTORLAKE_S: -+ -+ case INTEL_FAM6_ATOM_GOLDMONT: /* Apollo Lake */ -+ case INTEL_FAM6_ATOM_GOLDMONT_D: /* Denverton */ -+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* Gemini Lake */ -+ case INTEL_FAM6_ATOM_TREMONT_D: /* Snow Ridge / Parker Ridge */ -+ case INTEL_FAM6_ATOM_TREMONT: /* Elkhart Lake */ -+ case INTEL_FAM6_ATOM_TREMONT_L: /* Jasper Lake */ -+ case INTEL_FAM6_ATOM_GRACEMONT: /* Alder Lake N */ -+ return; -+ } -+ -+ /* -+ * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO, -+ * perhaps because of it's age or because of out-of-date microcode. -+ * Synthesise it. -+ */ -+ setup_force_cpu_cap(X86_FEATURE_RFDS_NO); -+} -+ - static bool __init cpu_has_gds(void) - { - /* -@@ -1860,6 +1940,7 @@ void __init init_speculation_mitigations(void) - * - * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html - * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html - * - * Relevant ucodes: - * -@@ -1889,8 +1970,12 @@ void __init init_speculation_mitigations(void) - * - * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing - * side effects as VERW and cannot be used in its place. -+ * -+ * - March 2023, for RFDS. Enumerate RFDS_CLEAR to mean that VERW now -+ * scrubs non-architectural entries from certain register files. - */ - mds_calculations(); -+ rfds_calculations(); - - /* - * Parts which enumerate FB_CLEAR are those with now-updated microcode -@@ -1922,15 +2007,19 @@ void __init init_speculation_mitigations(void) - * MLPDS/MFBDS when SMT is enabled. - */ - if ( opt_verw_pv == -1 ) -- opt_verw_pv = cpu_has_useful_md_clear; -+ opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear; - - if ( opt_verw_hvm == -1 ) -- opt_verw_hvm = cpu_has_useful_md_clear; -+ opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear; - - /* - * If SMT is active, and we're protecting against MDS or MMIO stale data, - * we need to scrub before going idle as well as on return to guest. - * Various pipeline resources are repartitioned amongst non-idle threads. -+ * -+ * We don't need to scrub on idle for RFDS. There are no affected cores -+ * which support SMT, despite there being affected cores in hybrid systems -+ * which have SMT elsewhere in the platform. - */ - if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || - opt_verw_mmio) && hw_smt_enabled ) -@@ -1944,7 +2033,8 @@ void __init init_speculation_mitigations(void) - * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the - * only *_CLEAR we can see. - */ -- if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) -+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear && -+ !cpu_has_rfds_clear ) - opt_verw_hvm = false; - - /* -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 337aaa9c770b..8e17ef670fff 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -266,6 +266,7 @@ XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffe - XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ - XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ - XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ -+XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */ - XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */ - XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */ - XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /*A AVX512 FP16 instructions */ -@@ -338,6 +339,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ - XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */ - XEN_CPUFEATURE(GDS_CTRL, 16*32+25) /* MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */ - XEN_CPUFEATURE(GDS_NO, 16*32+26) /*A No Gather Data Sampling */ -+XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */ -+XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VERW */ - - /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ - diff --git a/xsa453-4.18-1.patch b/xsa453-4.18-1.patch deleted file mode 100644 index a3c0f05..0000000 --- a/xsa453-4.18-1.patch +++ /dev/null @@ -1,50 +0,0 @@ -From: Andrew Cooper -Subject: xen: Swap order of actions in the FREE*() macros - -Wherever possible, it is a good idea to NULL out the visible reference to an -object prior to freeing it. The FREE*() macros already collect together both -parts, making it easy to adjust. - -This has a marginal code generation improvement, as some of the calls to the -free() function can be tailcall optimised. - -No functional change. - -Signed-off-by: Andrew Cooper -Acked-by: Jan Beulich -(cherry picked from commit c4f427ec879e7c0df6d44d02561e8bee838a293e) - -diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h -index 8b9618609f77..8bc5f4249d1b 100644 ---- a/xen/include/xen/mm.h -+++ b/xen/include/xen/mm.h -@@ -91,8 +91,9 @@ bool scrub_free_pages(void); - - /* Free an allocation, and zero the pointer to it. */ - #define FREE_XENHEAP_PAGES(p, o) do { \ -- free_xenheap_pages(p, o); \ -+ void *_ptr_ = (p); \ - (p) = NULL; \ -+ free_xenheap_pages(_ptr_, o); \ - } while ( false ) - #define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0) - -diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h -index 16979a117c6a..d857298011c1 100644 ---- a/xen/include/xen/xmalloc.h -+++ b/xen/include/xen/xmalloc.h -@@ -66,9 +66,10 @@ - extern void xfree(void *); - - /* Free an allocation, and zero the pointer to it. */ --#define XFREE(p) do { \ -- xfree(p); \ -- (p) = NULL; \ -+#define XFREE(p) do { \ -+ void *_ptr_ = (p); \ -+ (p) = NULL; \ -+ xfree(_ptr_); \ - } while ( false ) - - /* Underlying functions */ - diff --git a/xsa453-4.18-2.patch b/xsa453-4.18-2.patch deleted file mode 100644 index 01a7e05..0000000 --- a/xsa453-4.18-2.patch +++ /dev/null @@ -1,314 +0,0 @@ -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Subject: x86/spinlock: introduce support for blocking speculation into - critical regions - -Introduce a new Kconfig option to block speculation into lock protected -critical regions. The Kconfig option is enabled by default, but the mitigation -won't be engaged unless it's explicitly enabled in the command line using -`spec-ctrl=lock-harden`. - -Convert the spinlock acquire macros into always-inline functions, and introduce -a speculation barrier after the lock has been taken. Note the speculation -barrier is not placed inside the implementation of the spin lock functions, as -to prevent speculation from falling through the call to the lock functions -resulting in the barrier also being skipped. - -trylock variants are protected using a construct akin to the existing -evaluate_nospec(). - -This patch only implements the speculation barrier for x86. - -Note spin locks are the only locking primitive taken care in this change, -further locking primitives will be adjusted by separate changes. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -(cherry picked from commit 7ef0084418e188d05f338c3e028fbbe8b6924afa) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index fbf16839249a..3f9f9167182f 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2373,7 +2373,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - > {msr-sc,rsb,verw,ibpb-entry}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, --> unpriv-mmio,gds-mit,div-scrub}= ]` -+> unpriv-mmio,gds-mit,div-scrub,lock-harden}= ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2500,6 +2500,11 @@ On all hardware, the `div-scrub=` option can be used to force or prevent Xen - from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate - DIV-leakage on hardware believed to be vulnerable. - -+If Xen is compiled with `CONFIG_SPECULATIVE_HARDEN_LOCK`, the `lock-harden=` -+boolean can be used to force or prevent Xen from using speculation barriers to -+protect lock critical regions. This mitigation won't be engaged by default, -+and needs to be explicitly enabled on the command line. -+ - ### sync_console - > `= ` - -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index c3aad21c3b43..7e8221fd85dd 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF, X86_SYNTH( 8)) /* APERFMPERF */ - XEN_CPUFEATURE(MFENCE_RDTSC, X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */ - XEN_CPUFEATURE(XEN_SMEP, X86_SYNTH(10)) /* SMEP gets used by Xen itself */ - XEN_CPUFEATURE(XEN_SMAP, X86_SYNTH(11)) /* SMAP gets used by Xen itself */ --/* Bit 12 unused. */ -+XEN_CPUFEATURE(SC_NO_LOCK_HARDEN, X86_SYNTH(12)) /* (Disable) Lock critical region hardening */ - XEN_CPUFEATURE(IND_THUNK_LFENCE, X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */ - XEN_CPUFEATURE(IND_THUNK_JMP, X86_SYNTH(14)) /* Use IND_THUNK_JMP */ - XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */ -diff --git a/xen/arch/x86/include/asm/nospec.h b/xen/arch/x86/include/asm/nospec.h -index 7150e76b87fb..0725839e1982 100644 ---- a/xen/arch/x86/include/asm/nospec.h -+++ b/xen/arch/x86/include/asm/nospec.h -@@ -38,6 +38,32 @@ static always_inline void block_speculation(void) - barrier_nospec_true(); - } - -+static always_inline void arch_block_lock_speculation(void) -+{ -+ alternative("lfence", "", X86_FEATURE_SC_NO_LOCK_HARDEN); -+} -+ -+/* Allow to insert a read memory barrier into conditionals */ -+static always_inline bool barrier_lock_true(void) -+{ -+ alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_LOCK_HARDEN); -+ return true; -+} -+ -+static always_inline bool barrier_lock_false(void) -+{ -+ alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_LOCK_HARDEN); -+ return false; -+} -+ -+static always_inline bool arch_lock_evaluate_nospec(bool condition) -+{ -+ if ( condition ) -+ return barrier_lock_true(); -+ else -+ return barrier_lock_false(); -+} -+ - #endif /* _ASM_X86_NOSPEC_H */ - - /* -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 8165379fed94..5dfc4ed69ec5 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -53,6 +53,7 @@ int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); -+static bool __initdata opt_lock_harden; - - bool __initdata bsp_delay_spec_ctrl; - uint8_t __read_mostly default_xen_spec_ctrl; -@@ -121,6 +122,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_ssbd = false; - opt_l1d_flush = 0; - opt_branch_harden = false; -+ opt_lock_harden = false; - opt_srb_lock = 0; - opt_unpriv_mmio = false; - opt_gds_mit = 0; -@@ -286,6 +288,16 @@ static int __init cf_check parse_spec_ctrl(const char *s) - rc = -EINVAL; - } - } -+ else if ( (val = parse_boolean("lock-harden", s, ss)) >= 0 ) -+ { -+ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) ) -+ opt_lock_harden = val; -+ else -+ { -+ no_config_param("SPECULATIVE_HARDEN_LOCK", "spec-ctrl", s, ss); -+ rc = -EINVAL; -+ } -+ } - else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) - opt_srb_lock = val; - else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) -@@ -488,7 +500,8 @@ static void __init print_details(enum ind_thunk thunk) - if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) || - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) || - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) || -- IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ) -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) ) - printk(" Compiled-in support:" - #ifdef CONFIG_INDIRECT_THUNK - " INDIRECT_THUNK" -@@ -504,11 +517,14 @@ static void __init print_details(enum ind_thunk thunk) - #endif - #ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS - " HARDEN_GUEST_ACCESS" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK -+ " HARDEN_LOCK" - #endif - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s%s\n", - thunk != THUNK_NONE ? "BTI-Thunk: " : "", - thunk == THUNK_NONE ? "" : - thunk == THUNK_RETPOLINE ? "RETPOLINE, " : -@@ -535,7 +551,8 @@ static void __init print_details(enum ind_thunk thunk) - opt_verw_pv || opt_verw_hvm || - opt_verw_mmio ? " VERW" : "", - opt_div_scrub ? " DIV" : "", -- opt_branch_harden ? " BRANCH_HARDEN" : ""); -+ opt_branch_harden ? " BRANCH_HARDEN" : "", -+ opt_lock_harden ? " LOCK_HARDEN" : ""); - - /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ - if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu ) -@@ -1918,6 +1935,9 @@ void __init init_speculation_mitigations(void) - if ( !opt_branch_harden ) - setup_force_cpu_cap(X86_FEATURE_SC_NO_BRANCH_HARDEN); - -+ if ( !opt_lock_harden ) -+ setup_force_cpu_cap(X86_FEATURE_SC_NO_LOCK_HARDEN); -+ - /* - * We do not disable HT by default on affected hardware. - * -diff --git a/xen/common/Kconfig b/xen/common/Kconfig -index 4d6fe051641d..3361a6d89257 100644 ---- a/xen/common/Kconfig -+++ b/xen/common/Kconfig -@@ -188,6 +188,23 @@ config SPECULATIVE_HARDEN_GUEST_ACCESS - - If unsure, say Y. - -+config SPECULATIVE_HARDEN_LOCK -+ bool "Speculative lock context hardening" -+ default y -+ depends on X86 -+ help -+ Contemporary processors may use speculative execution as a -+ performance optimisation, but this can potentially be abused by an -+ attacker to leak data via speculative sidechannels. -+ -+ One source of data leakage is via speculative accesses to lock -+ critical regions. -+ -+ This option is disabled by default at run time, and needs to be -+ enabled on the command line. -+ -+ If unsure, say Y. -+ - endmenu - - config DIT_DEFAULT -diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h -index 76255bc46efe..455284640396 100644 ---- a/xen/include/xen/nospec.h -+++ b/xen/include/xen/nospec.h -@@ -70,6 +70,21 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - #define array_access_nospec(array, index) \ - (array)[array_index_nospec(index, ARRAY_SIZE(array))] - -+static always_inline void block_lock_speculation(void) -+{ -+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK -+ arch_block_lock_speculation(); -+#endif -+} -+ -+static always_inline bool lock_evaluate_nospec(bool condition) -+{ -+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK -+ return arch_lock_evaluate_nospec(condition); -+#endif -+ return condition; -+} -+ - #endif /* XEN_NOSPEC_H */ - - /* -diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h -index e7a1c1aa8988..28fce5615e5c 100644 ---- a/xen/include/xen/spinlock.h -+++ b/xen/include/xen/spinlock.h -@@ -1,6 +1,7 @@ - #ifndef __SPINLOCK_H__ - #define __SPINLOCK_H__ - -+#include - #include - #include - -@@ -195,13 +196,30 @@ int _spin_trylock_recursive(spinlock_t *lock); - void _spin_lock_recursive(spinlock_t *lock); - void _spin_unlock_recursive(spinlock_t *lock); - --#define spin_lock(l) _spin_lock(l) --#define spin_lock_cb(l, c, d) _spin_lock_cb(l, c, d) --#define spin_lock_irq(l) _spin_lock_irq(l) -+static always_inline void spin_lock(spinlock_t *l) -+{ -+ _spin_lock(l); -+ block_lock_speculation(); -+} -+ -+static always_inline void spin_lock_cb(spinlock_t *l, void (*c)(void *data), -+ void *d) -+{ -+ _spin_lock_cb(l, c, d); -+ block_lock_speculation(); -+} -+ -+static always_inline void spin_lock_irq(spinlock_t *l) -+{ -+ _spin_lock_irq(l); -+ block_lock_speculation(); -+} -+ - #define spin_lock_irqsave(l, f) \ - ({ \ - BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \ - ((f) = _spin_lock_irqsave(l)); \ -+ block_lock_speculation(); \ - }) - - #define spin_unlock(l) _spin_unlock(l) -@@ -209,7 +227,7 @@ void _spin_unlock_recursive(spinlock_t *lock); - #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f) - - #define spin_is_locked(l) _spin_is_locked(l) --#define spin_trylock(l) _spin_trylock(l) -+#define spin_trylock(l) lock_evaluate_nospec(_spin_trylock(l)) - - #define spin_trylock_irqsave(lock, flags) \ - ({ \ -@@ -230,8 +248,15 @@ void _spin_unlock_recursive(spinlock_t *lock); - * are any critical regions that cannot form part of such a set, they can use - * standard spin_[un]lock(). - */ --#define spin_trylock_recursive(l) _spin_trylock_recursive(l) --#define spin_lock_recursive(l) _spin_lock_recursive(l) -+#define spin_trylock_recursive(l) \ -+ lock_evaluate_nospec(_spin_trylock_recursive(l)) -+ -+static always_inline void spin_lock_recursive(spinlock_t *l) -+{ -+ _spin_lock_recursive(l); -+ block_lock_speculation(); -+} -+ - #define spin_unlock_recursive(l) _spin_unlock_recursive(l) - - #endif /* __SPINLOCK_H__ */ diff --git a/xsa453-4.18-3.patch b/xsa453-4.18-3.patch deleted file mode 100644 index ecaff41..0000000 --- a/xsa453-4.18-3.patch +++ /dev/null @@ -1,113 +0,0 @@ -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Subject: rwlock: introduce support for blocking speculation into critical - regions - -Introduce inline wrappers as required and add direct calls to -block_lock_speculation() in order to prevent speculation into the rwlock -protected critical regions. - -Note the rwlock primitives are adjusted to use the non speculation safe variants -of the spinlock handlers, as a speculation barrier is added in the rwlock -calling wrappers. - -trylock variants are protected by using lock_evaluate_nospec(). - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -(cherry picked from commit a1fb15f61692b1fa9945fc51f55471ace49cdd59) - -diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c -index 18224a4bb5d6..290602936df6 100644 ---- a/xen/common/rwlock.c -+++ b/xen/common/rwlock.c -@@ -34,8 +34,11 @@ void queue_read_lock_slowpath(rwlock_t *lock) - - /* - * Put the reader into the wait queue. -+ * -+ * Use the speculation unsafe helper, as it's the caller responsibility to -+ * issue a speculation barrier if required. - */ -- spin_lock(&lock->lock); -+ _spin_lock(&lock->lock); - - /* - * At the head of the wait queue now, wait until the writer state -@@ -66,8 +69,13 @@ void queue_write_lock_slowpath(rwlock_t *lock) - { - u32 cnts; - -- /* Put the writer into the wait queue. */ -- spin_lock(&lock->lock); -+ /* -+ * Put the writer into the wait queue. -+ * -+ * Use the speculation unsafe helper, as it's the caller responsibility to -+ * issue a speculation barrier if required. -+ */ -+ _spin_lock(&lock->lock); - - /* Try to acquire the lock directly if no reader is present. */ - if ( !atomic_read(&lock->cnts) && -diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h -index e0d2b41c5c7e..9a0d3ec23847 100644 ---- a/xen/include/xen/rwlock.h -+++ b/xen/include/xen/rwlock.h -@@ -259,27 +259,49 @@ static inline int _rw_is_write_locked(const rwlock_t *lock) - return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED; - } - --#define read_lock(l) _read_lock(l) --#define read_lock_irq(l) _read_lock_irq(l) -+static always_inline void read_lock(rwlock_t *l) -+{ -+ _read_lock(l); -+ block_lock_speculation(); -+} -+ -+static always_inline void read_lock_irq(rwlock_t *l) -+{ -+ _read_lock_irq(l); -+ block_lock_speculation(); -+} -+ - #define read_lock_irqsave(l, f) \ - ({ \ - BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \ - ((f) = _read_lock_irqsave(l)); \ -+ block_lock_speculation(); \ - }) - - #define read_unlock(l) _read_unlock(l) - #define read_unlock_irq(l) _read_unlock_irq(l) - #define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f) --#define read_trylock(l) _read_trylock(l) -+#define read_trylock(l) lock_evaluate_nospec(_read_trylock(l)) -+ -+static always_inline void write_lock(rwlock_t *l) -+{ -+ _write_lock(l); -+ block_lock_speculation(); -+} -+ -+static always_inline void write_lock_irq(rwlock_t *l) -+{ -+ _write_lock_irq(l); -+ block_lock_speculation(); -+} - --#define write_lock(l) _write_lock(l) --#define write_lock_irq(l) _write_lock_irq(l) - #define write_lock_irqsave(l, f) \ - ({ \ - BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \ - ((f) = _write_lock_irqsave(l)); \ -+ block_lock_speculation(); \ - }) --#define write_trylock(l) _write_trylock(l) -+#define write_trylock(l) lock_evaluate_nospec(_write_trylock(l)) - - #define write_unlock(l) _write_unlock(l) - #define write_unlock_irq(l) _write_unlock_irq(l) diff --git a/xsa453-4.18-4.patch b/xsa453-4.18-4.patch deleted file mode 100644 index a3c6b3c..0000000 --- a/xsa453-4.18-4.patch +++ /dev/null @@ -1,75 +0,0 @@ -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Subject: percpu-rwlock: introduce support for blocking speculation into - critical regions - -Add direct calls to block_lock_speculation() where required in order to prevent -speculation into the lock protected critical regions. Also convert -_percpu_read_lock() from inline to always_inline. - -Note that _percpu_write_lock() has been modified the use the non speculation -safe of the locking primites, as a speculation is added unconditionally by the -calling wrapper. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -(cherry picked from commit f218daf6d3a3b847736d37c6a6b76031a0d08441) - -diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c -index 290602936df6..f5a249bcc240 100644 ---- a/xen/common/rwlock.c -+++ b/xen/common/rwlock.c -@@ -129,8 +129,12 @@ void _percpu_write_lock(percpu_rwlock_t **per_cpudata, - /* - * First take the write lock to protect against other writers or slow - * path readers. -+ * -+ * Note we use the speculation unsafe variant of write_lock(), as the -+ * calling wrapper already adds a speculation barrier after the lock has -+ * been taken. - */ -- write_lock(&percpu_rwlock->rwlock); -+ _write_lock(&percpu_rwlock->rwlock); - - /* Now set the global variable so that readers start using read_lock. */ - percpu_rwlock->writer_activating = 1; -diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h -index 9a0d3ec23847..9e35ee2edf8f 100644 ---- a/xen/include/xen/rwlock.h -+++ b/xen/include/xen/rwlock.h -@@ -338,8 +338,8 @@ static inline void _percpu_rwlock_owner_check(percpu_rwlock_t **per_cpudata, - #define percpu_rwlock_resource_init(l, owner) \ - (*(l) = (percpu_rwlock_t)PERCPU_RW_LOCK_UNLOCKED(&get_per_cpu_var(owner))) - --static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata, -- percpu_rwlock_t *percpu_rwlock) -+static always_inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata, -+ percpu_rwlock_t *percpu_rwlock) - { - /* Validate the correct per_cpudata variable has been provided. */ - _percpu_rwlock_owner_check(per_cpudata, percpu_rwlock); -@@ -374,6 +374,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata, - } - else - { -+ /* Other branch already has a speculation barrier in read_lock(). */ -+ block_lock_speculation(); - /* All other paths have implicit check_lock() calls via read_lock(). */ - check_lock(&percpu_rwlock->rwlock.lock.debug, false); - } -@@ -430,8 +432,12 @@ static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata, - _percpu_read_lock(&get_per_cpu_var(percpu), lock) - #define percpu_read_unlock(percpu, lock) \ - _percpu_read_unlock(&get_per_cpu_var(percpu), lock) --#define percpu_write_lock(percpu, lock) \ -- _percpu_write_lock(&get_per_cpu_var(percpu), lock) -+ -+#define percpu_write_lock(percpu, lock) \ -+({ \ -+ _percpu_write_lock(&get_per_cpu_var(percpu), lock); \ -+ block_lock_speculation(); \ -+}) - #define percpu_write_unlock(percpu, lock) \ - _percpu_write_unlock(&get_per_cpu_var(percpu), lock) - diff --git a/xsa453-4.18-5.patch b/xsa453-4.18-5.patch deleted file mode 100644 index aab46e3..0000000 --- a/xsa453-4.18-5.patch +++ /dev/null @@ -1,382 +0,0 @@ -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Subject: locking: attempt to ensure lock wrappers are always inline - -In order to prevent the locking speculation barriers from being inside of -`call`ed functions that could be speculatively bypassed. - -While there also add an extra locking barrier to _mm_write_lock() in the branch -taken when the lock is already held. - -Note some functions are switched to use the unsafe variants (without speculation -barrier) of the locking primitives, but a speculation barrier is always added -to the exposed public lock wrapping helper. That's the case with -sched_spin_lock_double() or pcidevs_lock() for example. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -(cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762) - -diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c -index 8f53e88d6706..e1d6845a2844 100644 ---- a/xen/arch/x86/hvm/vpt.c -+++ b/xen/arch/x86/hvm/vpt.c -@@ -150,7 +150,7 @@ static int pt_irq_masked(struct periodic_time *pt) - * pt->vcpu field, because another thread holding the pt_migrate lock - * may already be spinning waiting for your vcpu lock. - */ --static void pt_vcpu_lock(struct vcpu *v) -+static always_inline void pt_vcpu_lock(struct vcpu *v) - { - spin_lock(&v->arch.hvm.tm_lock); - } -@@ -169,9 +169,13 @@ static void pt_vcpu_unlock(struct vcpu *v) - * need to take an additional lock that protects against pt->vcpu - * changing. - */ --static void pt_lock(struct periodic_time *pt) -+static always_inline void pt_lock(struct periodic_time *pt) - { -- read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); -+ /* -+ * Use the speculation unsafe variant for the first lock, as the following -+ * lock taking helper already includes a speculation barrier. -+ */ -+ _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); - spin_lock(&pt->vcpu->arch.hvm.tm_lock); - } - -diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h -index a87af47ece22..465ab39bb041 100644 ---- a/xen/arch/x86/include/asm/irq.h -+++ b/xen/arch/x86/include/asm/irq.h -@@ -174,6 +174,7 @@ void cf_check irq_complete_move(struct irq_desc *desc); - - extern struct irq_desc *irq_desc; - -+/* Not speculation safe, only used for AP bringup. */ - void lock_vector_lock(void); - void unlock_vector_lock(void); - -diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h -index 5a3f96fbaadd..5ec080c02fd8 100644 ---- a/xen/arch/x86/mm/mm-locks.h -+++ b/xen/arch/x86/mm/mm-locks.h -@@ -74,8 +74,8 @@ static inline void _set_lock_level(int l) - this_cpu(mm_lock_level) = l; - } - --static inline void _mm_lock(const struct domain *d, mm_lock_t *l, -- const char *func, int level, int rec) -+static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l, -+ const char *func, int level, int rec) - { - if ( !((mm_locked_by_me(l)) && rec) ) - _check_lock_level(d, level); -@@ -125,8 +125,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l) - return (l->locker == get_processor_id()); - } - --static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, -- const char *func, int level) -+static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, -+ const char *func, int level) - { - if ( !mm_write_locked_by_me(l) ) - { -@@ -137,6 +137,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, - l->unlock_level = _get_lock_level(); - _set_lock_level(_lock_level(d, level)); - } -+ else -+ block_speculation(); - l->recurse_count++; - } - -@@ -150,8 +152,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l) - percpu_write_unlock(p2m_percpu_rwlock, &l->lock); - } - --static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l, -- int level) -+static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l, -+ int level) - { - _check_lock_level(d, level); - percpu_read_lock(p2m_percpu_rwlock, &l->lock); -@@ -166,15 +168,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l) - - /* This wrapper uses the line number to express the locking order below */ - #define declare_mm_lock(name) \ -- static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \ -- const char *func, int rec) \ -+ static always_inline void mm_lock_##name( \ -+ const struct domain *d, mm_lock_t *l, const char *func, int rec) \ - { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); } - #define declare_mm_rwlock(name) \ -- static inline void mm_write_lock_##name(const struct domain *d, \ -- mm_rwlock_t *l, const char *func) \ -+ static always_inline void mm_write_lock_##name( \ -+ const struct domain *d, mm_rwlock_t *l, const char *func) \ - { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \ -- static inline void mm_read_lock_##name(const struct domain *d, \ -- mm_rwlock_t *l) \ -+ static always_inline void mm_read_lock_##name(const struct domain *d, \ -+ mm_rwlock_t *l) \ - { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); } - /* These capture the name of the calling function */ - #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0) -@@ -309,7 +311,7 @@ declare_mm_lock(altp2mlist) - #define MM_LOCK_ORDER_altp2m 40 - declare_mm_rwlock(altp2m); - --static inline void p2m_lock(struct p2m_domain *p) -+static always_inline void p2m_lock(struct p2m_domain *p) - { - if ( p2m_is_altp2m(p) ) - mm_write_lock(altp2m, p->domain, &p->lock); -diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c -index 9969eb45fa8c..9be67b63ce3e 100644 ---- a/xen/arch/x86/mm/p2m-pod.c -+++ b/xen/arch/x86/mm/p2m-pod.c -@@ -24,7 +24,7 @@ - #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0) - - /* Enforce lock ordering when grabbing the "external" page_alloc lock */ --static inline void lock_page_alloc(struct p2m_domain *p2m) -+static always_inline void lock_page_alloc(struct p2m_domain *p2m) - { - page_alloc_mm_pre_lock(p2m->domain); - spin_lock(&(p2m->domain->page_alloc_lock)); -diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c -index a7a004a08429..66f924a7b091 100644 ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -45,7 +45,7 @@ - * just assume the event channel is free or unbound at the moment when the - * evtchn_read_trylock() returns false. - */ --static inline void evtchn_write_lock(struct evtchn *evtchn) -+static always_inline void evtchn_write_lock(struct evtchn *evtchn) - { - write_lock(&evtchn->lock); - -@@ -351,7 +351,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port) - return rc; - } - --static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn) -+static always_inline void double_evtchn_lock(struct evtchn *lchn, -+ struct evtchn *rchn) - { - ASSERT(lchn != rchn); - -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 89b7811c51c3..934924cbda66 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -403,7 +403,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn) - - static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock); - --static inline void grant_read_lock(struct grant_table *gt) -+static always_inline void grant_read_lock(struct grant_table *gt) - { - percpu_read_lock(grant_rwlock, >->lock); - } -@@ -413,7 +413,7 @@ static inline void grant_read_unlock(struct grant_table *gt) - percpu_read_unlock(grant_rwlock, >->lock); - } - --static inline void grant_write_lock(struct grant_table *gt) -+static always_inline void grant_write_lock(struct grant_table *gt) - { - percpu_write_lock(grant_rwlock, >->lock); - } -@@ -450,7 +450,7 @@ nr_active_grant_frames(struct grant_table *gt) - return num_act_frames_from_sha_frames(nr_grant_frames(gt)); - } - --static inline struct active_grant_entry * -+static always_inline struct active_grant_entry * - active_entry_acquire(struct grant_table *t, grant_ref_t e) - { - struct active_grant_entry *act; -diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c -index 901782bbb416..34ad39b9ad0b 100644 ---- a/xen/common/sched/core.c -+++ b/xen/common/sched/core.c -@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu) - * This avoids dead- or live-locks when this code is running on both - * cpus at the same time. - */ --static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2, -- unsigned long *flags) -+static always_inline void sched_spin_lock_double( -+ spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags) - { -+ /* -+ * In order to avoid extra overhead, use the locking primitives without the -+ * speculation barrier, and introduce a single barrier here. -+ */ - if ( lock1 == lock2 ) - { -- spin_lock_irqsave(lock1, *flags); -+ *flags = _spin_lock_irqsave(lock1); - } - else if ( lock1 < lock2 ) - { -- spin_lock_irqsave(lock1, *flags); -- spin_lock(lock2); -+ *flags = _spin_lock_irqsave(lock1); -+ _spin_lock(lock2); - } - else - { -- spin_lock_irqsave(lock2, *flags); -- spin_lock(lock1); -+ *flags = _spin_lock_irqsave(lock2); -+ _spin_lock(lock1); - } -+ block_lock_speculation(); - } - - static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2, -diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h -index c516976c3740..3b97f1576782 100644 ---- a/xen/common/sched/private.h -+++ b/xen/common/sched/private.h -@@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch); - #define cpumask_scratch (&this_cpu(cpumask_scratch)) - #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c)) - -+/* -+ * Deal with _spin_lock_irqsave() returning the flags value instead of storing -+ * it in a passed parameter. -+ */ -+#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock) -+#define _sched_spinlock1(lock, irq, arg) ({ \ -+ BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \ -+ (arg) = _spin_lock##irq(lock); \ -+}) -+ -+#define _sched_spinlock__(nr) _sched_spinlock ## nr -+#define _sched_spinlock_(nr) _sched_spinlock__(nr) -+#define _sched_spinlock(lock, irq, args...) \ -+ _sched_spinlock_(count_args(args))(lock, irq, ## args) -+ - #define sched_lock(kind, param, cpu, irq, arg...) \ --static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ -+static always_inline spinlock_t \ -+*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ - { \ - for ( ; ; ) \ - { \ -@@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ - * \ - * It may also be the case that v->processor may change but the \ - * lock may be the same; this will succeed in that case. \ -+ * \ -+ * Use the speculation unsafe locking helper, there's a speculation \ -+ * barrier before returning to the caller. \ - */ \ -- spin_lock##irq(lock, ## arg); \ -+ _sched_spinlock(lock, irq, ## arg); \ - if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \ -+ { \ -+ block_lock_speculation(); \ - return lock; \ -+ } \ - spin_unlock##irq(lock, ## arg); \ - } \ - } -diff --git a/xen/common/timer.c b/xen/common/timer.c -index 0fddfa74879e..38eb5fd20d36 100644 ---- a/xen/common/timer.c -+++ b/xen/common/timer.c -@@ -239,7 +239,7 @@ static inline void deactivate_timer(struct timer *timer) - list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive); - } - --static inline bool_t timer_lock(struct timer *timer) -+static inline bool_t timer_lock_unsafe(struct timer *timer) - { - unsigned int cpu; - -@@ -253,7 +253,8 @@ static inline bool_t timer_lock(struct timer *timer) - rcu_read_unlock(&timer_cpu_read_lock); - return 0; - } -- spin_lock(&per_cpu(timers, cpu).lock); -+ /* Use the speculation unsafe variant, the wrapper has the barrier. */ -+ _spin_lock(&per_cpu(timers, cpu).lock); - if ( likely(timer->cpu == cpu) ) - break; - spin_unlock(&per_cpu(timers, cpu).lock); -@@ -266,8 +267,9 @@ static inline bool_t timer_lock(struct timer *timer) - #define timer_lock_irqsave(t, flags) ({ \ - bool_t __x; \ - local_irq_save(flags); \ -- if ( !(__x = timer_lock(t)) ) \ -+ if ( !(__x = timer_lock_unsafe(t)) ) \ - local_irq_restore(flags); \ -+ block_lock_speculation(); \ - __x; \ - }) - -diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c -index e99837b6e141..2a1e7ee89a5d 100644 ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -52,9 +52,10 @@ struct pci_seg { - - static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED; - --void pcidevs_lock(void) -+/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */ -+void pcidevs_lock_unsafe(void) - { -- spin_lock_recursive(&_pcidevs_lock); -+ _spin_lock_recursive(&_pcidevs_lock); - } - - void pcidevs_unlock(void) -diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h -index 8e509e078475..f1472ea1ebe5 100644 ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport); - #define bucket_from_port(d, p) \ - ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET]) - --static inline void evtchn_read_lock(struct evtchn *evtchn) -+static always_inline void evtchn_read_lock(struct evtchn *evtchn) - { - read_lock(&evtchn->lock); - } - --static inline bool evtchn_read_trylock(struct evtchn *evtchn) -+static always_inline bool evtchn_read_trylock(struct evtchn *evtchn) - { - return read_trylock(&evtchn->lock); - } -diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h -index 251b8761a8e9..a71bed36be29 100644 ---- a/xen/include/xen/pci.h -+++ b/xen/include/xen/pci.h -@@ -155,8 +155,12 @@ struct pci_dev { - * devices, it also sync the access to the msi capability that is not - * interrupt handling related (the mask bit register). - */ -- --void pcidevs_lock(void); -+void pcidevs_lock_unsafe(void); -+static always_inline void pcidevs_lock(void) -+{ -+ pcidevs_lock_unsafe(); -+ block_lock_speculation(); -+} - void pcidevs_unlock(void); - bool __must_check pcidevs_locked(void); - diff --git a/xsa453-4.18-6.patch b/xsa453-4.18-6.patch deleted file mode 100644 index e8104c5..0000000 --- a/xsa453-4.18-6.patch +++ /dev/null @@ -1,61 +0,0 @@ -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Subject: x86/mm: add speculation barriers to open coded locks - -Add a speculation barrier to the clearly identified open-coded lock taking -functions. - -Note that the memory sharing page_lock() replacement (_page_lock()) is left -as-is, as the code is experimental and not security supported. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -(cherry picked from commit 42a572a38e22a97d86a4b648a22597628d5b42e4) - -diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h -index 05dfe35502c8..d1b1fee99b7d 100644 ---- a/xen/arch/x86/include/asm/mm.h -+++ b/xen/arch/x86/include/asm/mm.h -@@ -399,7 +399,9 @@ const struct platform_bad_page *get_platform_badpages(unsigned int *array_size); - * The use of PGT_locked in mem_sharing does not collide, since mem_sharing is - * only supported for hvm guests, which do not have PV PTEs updated. - */ --int page_lock(struct page_info *page); -+int page_lock_unsafe(struct page_info *page); -+#define page_lock(pg) lock_evaluate_nospec(page_lock_unsafe(pg)) -+ - void page_unlock(struct page_info *page); - - void put_page_type(struct page_info *page); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index ab0acbfea6e5..000fd0fb558b 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2017,7 +2017,7 @@ static inline bool current_locked_page_ne_check(struct page_info *page) { - #define current_locked_page_ne_check(x) true - #endif - --int page_lock(struct page_info *page) -+int page_lock_unsafe(struct page_info *page) - { - unsigned long x, nx; - -@@ -2078,7 +2078,7 @@ void page_unlock(struct page_info *page) - * l3t_lock(), so to avoid deadlock we must avoid grabbing them in - * reverse order. - */ --static void l3t_lock(struct page_info *page) -+static always_inline void l3t_lock(struct page_info *page) - { - unsigned long x, nx; - -@@ -2087,6 +2087,8 @@ static void l3t_lock(struct page_info *page) - cpu_relax(); - nx = x | PGT_locked; - } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x ); -+ -+ block_lock_speculation(); - } - - static void l3t_unlock(struct page_info *page) diff --git a/xsa453-4.18-7.patch b/xsa453-4.18-7.patch deleted file mode 100644 index 48f0bf9..0000000 --- a/xsa453-4.18-7.patch +++ /dev/null @@ -1,201 +0,0 @@ -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= -Subject: x86: protect conditional lock taking from speculative execution - -Conditionally taken locks that use the pattern: - -if ( lock ) - spin_lock(...); - -Need an else branch in order to issue an speculation barrier in the else case, -just like it's done in case the lock needs to be acquired. - -eval_nospec() could be used on the condition itself, but that would result in a -double barrier on the branch where the lock is taken. - -Introduce a new pair of helpers, {gfn,spin}_lock_if() that can be used to -conditionally take a lock in a speculation safe way. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné -Reviewed-by: Jan Beulich -(cherry picked from commit 03cf7ca23e0e876075954c558485b267b7d02406) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 000fd0fb558b..45bfbc2522f7 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -5007,8 +5007,7 @@ static l3_pgentry_t *virt_to_xen_l3e(unsigned long v) - if ( !l3t ) - return NULL; - UNMAP_DOMAIN_PAGE(l3t); -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) - { - l4_pgentry_t l4e = l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR); -@@ -5045,8 +5044,7 @@ static l2_pgentry_t *virt_to_xen_l2e(unsigned long v) - return NULL; - } - UNMAP_DOMAIN_PAGE(l2t); -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) - { - l3e_write(pl3e, l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR)); -@@ -5084,8 +5082,7 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v) - return NULL; - } - UNMAP_DOMAIN_PAGE(l1t); -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) - { - l2e_write(pl2e, l2e_from_mfn(l1mfn, __PAGE_HYPERVISOR)); -@@ -5116,6 +5113,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v) - do { \ - if ( locking ) \ - l3t_lock(page); \ -+ else \ -+ block_lock_speculation(); \ - } while ( false ) - - #define L3T_UNLOCK(page) \ -@@ -5331,8 +5330,7 @@ int map_pages_to_xen( - if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) - flush_flags |= FLUSH_TLB_GLOBAL; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) && - (l3e_get_flags(*pl3e) & _PAGE_PSE) ) - { -@@ -5436,8 +5434,7 @@ int map_pages_to_xen( - if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL ) - flush_flags |= FLUSH_TLB_GLOBAL; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) && - (l2e_get_flags(*pl2e) & _PAGE_PSE) ) - { -@@ -5478,8 +5475,7 @@ int map_pages_to_xen( - unsigned long base_mfn; - const l1_pgentry_t *l1t; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - ol2e = *pl2e; - /* -@@ -5533,8 +5529,7 @@ int map_pages_to_xen( - unsigned long base_mfn; - const l2_pgentry_t *l2t; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - ol3e = *pl3e; - /* -@@ -5678,8 +5673,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - l3e_get_flags(*pl3e))); - UNMAP_DOMAIN_PAGE(l2t); - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) && - (l3e_get_flags(*pl3e) & _PAGE_PSE) ) - { -@@ -5738,8 +5732,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - l2e_get_flags(*pl2e) & ~_PAGE_PSE)); - UNMAP_DOMAIN_PAGE(l1t); - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) && - (l2e_get_flags(*pl2e) & _PAGE_PSE) ) - { -@@ -5783,8 +5776,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - */ - if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) ) - continue; -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - /* - * L2E may be already cleared, or set to a superpage, by -@@ -5831,8 +5823,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - if ( (nf & _PAGE_PRESENT) || - ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) ) - continue; -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - /* - * L3E may be already cleared, or set to a superpage, by -diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h -index 5ec080c02fd8..b4960fb90eff 100644 ---- a/xen/arch/x86/mm/mm-locks.h -+++ b/xen/arch/x86/mm/mm-locks.h -@@ -335,6 +335,15 @@ static inline void p2m_unlock(struct p2m_domain *p) - #define p2m_locked_by_me(p) mm_write_locked_by_me(&(p)->lock) - #define gfn_locked_by_me(p,g) p2m_locked_by_me(p) - -+static always_inline void gfn_lock_if(bool condition, struct p2m_domain *p2m, -+ gfn_t gfn, unsigned int order) -+{ -+ if ( condition ) -+ gfn_lock(p2m, gfn, order); -+ else -+ block_lock_speculation(); -+} -+ - /* PoD lock (per-p2m-table) - * - * Protects private PoD data structs: entry and cache -diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c -index 0983bd71d9a9..22ab1d606e8a 100644 ---- a/xen/arch/x86/mm/p2m.c -+++ b/xen/arch/x86/mm/p2m.c -@@ -280,9 +280,8 @@ mfn_t p2m_get_gfn_type_access(struct p2m_domain *p2m, gfn_t gfn, - if ( q & P2M_UNSHARE ) - q |= P2M_ALLOC; - -- if ( locked ) -- /* Grab the lock here, don't release until put_gfn */ -- gfn_lock(p2m, gfn, 0); -+ /* Grab the lock here, don't release until put_gfn */ -+ gfn_lock_if(locked, p2m, gfn, 0); - - mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL); - -diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h -index 28fce5615e5c..c830df3430a3 100644 ---- a/xen/include/xen/spinlock.h -+++ b/xen/include/xen/spinlock.h -@@ -222,6 +222,14 @@ static always_inline void spin_lock_irq(spinlock_t *l) - block_lock_speculation(); \ - }) - -+/* Conditionally take a spinlock in a speculation safe way. */ -+static always_inline void spin_lock_if(bool condition, spinlock_t *l) -+{ -+ if ( condition ) -+ _spin_lock(l); -+ block_lock_speculation(); -+} -+ - #define spin_unlock(l) _spin_unlock(l) - #define spin_unlock_irq(l) _spin_unlock_irq(l) - #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f)