From 136d0a1506e43791ffe0752208799f626918130f Mon Sep 17 00:00:00 2001 From: Michael Young Date: Sep 17 2022 15:52:46 +0000 Subject: update to xen-4.16.2 --- diff --git a/.gitignore b/.gitignore index 8f4e85c..f41c7f1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,4 @@ lwip-1.3.0.tar.gz pciutils-2.2.9.tar.bz2 zlib-1.2.3.tar.gz polarssl-1.1.4-gpl.tgz -/xen-4.16.1.tar.gz +/xen-4.16.2.tar.gz diff --git a/sources b/sources index 6a26e23..f6803eb 100644 --- a/sources +++ b/sources @@ -4,4 +4,4 @@ SHA512 (newlib-1.16.0.tar.gz) = 40eb96bbc6736a16b6399e0cdb73e853d0d90b685c967e77 SHA512 (zlib-1.2.3.tar.gz) = 021b958fcd0d346c4ba761bcf0cc40f3522de6186cf5a0a6ea34a70504ce9622b1c2626fce40675bc8282cf5f5ade18473656abc38050f72f5d6480507a2106e SHA512 (polarssl-1.1.4-gpl.tgz) = 88da614e4d3f4409c4fd3bb3e44c7587ba051e3fed4e33d526069a67e8180212e1ea22da984656f50e290049f60ddca65383e5983c0f8884f648d71f698303ad SHA512 (pciutils-2.2.9.tar.bz2) = 2b3d98d027e46d8c08037366dde6f0781ca03c610ef2b380984639e4ef39899ed8d8b8e4cd9c9dc54df101279b95879bd66bfd4d04ad07fef41e847ea7ae32b5 -SHA512 (xen-4.16.1.tar.gz) = eeabba9c263cd2425bca083e32b5ebfc6c716c00553759c144fd4b6f64a89836b260787fa25ba22c1f5c4ea65aaad7c95b8c2c1070d3377b1c43c9517aa7032a +SHA512 (xen-4.16.2.tar.gz) = b6cd036c1073798dffa167ca14c954fbdfb4c0ef99662f7c435e7e5de687d1bde8856ff6bd030d0d2e661bd17ab631551f01b2cc728cad7e70b59aaa6e692783 diff --git a/xen.git-09d533f4c80b7eaf9fb4e36ebba8259580857a9d.patch b/xen.git-09d533f4c80b7eaf9fb4e36ebba8259580857a9d.patch deleted file mode 100644 index 1285967..0000000 --- a/xen.git-09d533f4c80b7eaf9fb4e36ebba8259580857a9d.patch +++ /dev/null @@ -1,93 +0,0 @@ -From 09d533f4c80b7eaf9fb4e36ebba8259580857a9d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 12 Jul 2022 11:12:46 +0200 -Subject: [PATCH] x86/spec-ctrl: Only adjust MSR_SPEC_CTRL for idle with legacy - IBRS -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -Back at the time of the original Spectre-v2 fixes, it was recommended to clear -MSR_SPEC_CTRL when going idle. This is because of the side effects on the -sibling thread caused by the microcode IBRS and STIBP implementations which -were retrofitted to existing CPUs. - -However, there are no relevant cross-thread impacts for the hardware -IBRS/STIBP implementations, so this logic should not be used on Intel CPUs -supporting eIBRS, or any AMD CPUs; doing so only adds unnecessary latency to -the idle path. - -Furthermore, there's no point playing with MSR_SPEC_CTRL in the idle paths if -SMT is disabled for other reasons. - -Fixes: 8d03080d2a33 ("x86/spec-ctrl: Cease using thunk=lfence on AMD") -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné -master commit: ffc7694e0c99eea158c32aa164b7d1e1bb1dc46b -master date: 2022-06-30 18:07:13 +0100 ---- - xen/arch/x86/spec_ctrl.c | 10 ++++++++-- - xen/include/asm-x86/cpufeatures.h | 2 +- - xen/include/asm-x86/spec_ctrl.h | 5 +++-- - 3 files changed, 12 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 099113ba41..1ed5ceda8b 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1150,8 +1150,14 @@ void __init init_speculation_mitigations(void) - /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */ - init_shadow_spec_ctrl_state(); - -- /* If Xen is using any MSR_SPEC_CTRL settings, adjust the idle path. */ -- if ( default_xen_spec_ctrl ) -+ /* -+ * For microcoded IBRS only (i.e. Intel, pre eIBRS), it is recommended to -+ * clear MSR_SPEC_CTRL before going idle, to avoid impacting sibling -+ * threads. Activate this if SMT is enabled, and Xen is using a non-zero -+ * MSR_SPEC_CTRL setting. -+ */ -+ if ( boot_cpu_has(X86_FEATURE_IBRSB) && !(caps & ARCH_CAPS_IBRS_ALL) && -+ hw_smt_enabled && default_xen_spec_ctrl ) - setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE); - - xpti_init_default(caps); -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index bd45a144ee..493d338a08 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -33,7 +33,7 @@ XEN_CPUFEATURE(SC_MSR_HVM, X86_SYNTH(17)) /* MSR_SPEC_CTRL used by Xen fo - XEN_CPUFEATURE(SC_RSB_PV, X86_SYNTH(18)) /* RSB overwrite needed for PV */ - XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM */ - XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ --XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */ -+XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* Clear MSR_SPEC_CTRL on idle */ - XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ - /* Bits 23,24 unused. */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 751355f471..7e83e0179f 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -78,7 +78,8 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) - uint32_t val = 0; - - /* -- * Branch Target Injection: -+ * It is recommended in some cases to clear MSR_SPEC_CTRL when going idle, -+ * to avoid impacting sibling threads. - * - * Latch the new shadow value, then enable shadowing, then update the MSR. - * There are no SMP issues here; only local processor ordering concerns. -@@ -114,7 +115,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) - uint32_t val = info->xen_spec_ctrl; - - /* -- * Branch Target Injection: -+ * Restore MSR_SPEC_CTRL on exit from idle. - * - * Disable shadowing before updating the MSR. There are no SMP issues - * here; only local processor ordering concerns. --- -2.30.2 - diff --git a/xen.git-b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566.patch b/xen.git-b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566.patch deleted file mode 100644 index 2f1b7f5..0000000 --- a/xen.git-b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Jan Beulich -Date: Tue, 7 Jun 2022 11:58:16 +0000 (+0200) -Subject: VT-d: refuse to use IOMMU with reserved CAP.ND value -X-Git-Url: http://xenbits.xenproject.org/gitweb/?p=xen.git;a=commitdiff_plain;h=b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566 - -VT-d: refuse to use IOMMU with reserved CAP.ND value - -The field taking the value 7 (resulting in 18-bit DIDs when using the -calculation in cap_ndoms(), when the DID fields are only 16 bits wide) -is reserved. Instead of misbehaving in case we would encounter such an -IOMMU, refuse to use it. - -Signed-off-by: Jan Beulich -Reviewed-by: Roger Pau Monné -Reviewed-by: Kevin Tian -master commit: a1545fbf45c689aff39ce76a6eaa609d32ef72a7 -master date: 2022-04-20 10:54:26 +0200 ---- - -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index 93dd8aa643..8975c1de61 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -1279,8 +1279,11 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) - - quirk_iommu_caps(iommu); - -+ nr_dom = cap_ndoms(iommu->cap); -+ - if ( cap_fault_reg_offset(iommu->cap) + - cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE || -+ ((nr_dom - 1) >> 16) /* I.e. cap.nd > 6 */ || - ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE ) - { - printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported\n"); -@@ -1305,7 +1308,6 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) - vtd_ops.sync_cache = sync_cache; - - /* allocate domain id bitmap */ -- nr_dom = cap_ndoms(iommu->cap); - iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom)); - if ( !iommu->domid_bitmap ) - return -ENOMEM; diff --git a/xen.git-db6ca8176ccc4ff7dfe3c06969af9ebfab0d7b04.patch b/xen.git-db6ca8176ccc4ff7dfe3c06969af9ebfab0d7b04.patch deleted file mode 100644 index 4d5bb71..0000000 --- a/xen.git-db6ca8176ccc4ff7dfe3c06969af9ebfab0d7b04.patch +++ /dev/null @@ -1,233 +0,0 @@ -From db6ca8176ccc4ff7dfe3c06969af9ebfab0d7b04 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 12 Jul 2022 11:13:33 +0200 -Subject: [PATCH] x86/spec-ctrl: Knobs for STIBP and PSFD, and follow hardware - STIBP hint -MIME-Version: 1.0 -Content-Type: text/plain; charset=utf8 -Content-Transfer-Encoding: 8bit - -STIBP and PSFD are slightly weird bits, because they're both implied by other -bits in MSR_SPEC_CTRL. Add fine grain controls for them, and take the -implications into account when setting IBRS/SSBD. - -Rearrange the IBPB text/variables/logic to keep all the MSR_SPEC_CTRL bits -together, for consistency. - -However, AMD have a hardware hint CPUID bit recommending that STIBP be set -unilaterally. This is advertised on Zen3, so follow the recommendation. -Furthermore, in such cases, set STIBP behind the guest's back for now. This -has negligible overhead for the guest, but saves a WRMSR on vmentry. This is -the only default change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Roger Pau Monné -master commit: fef244b179c06fcdfa581f7d57fa6e578c49ff50 -master date: 2022-06-30 18:07:13 +0100 ---- - docs/misc/xen-command-line.pandoc | 21 +++++++--- - xen/arch/x86/hvm/svm/vmcb.c | 9 +++++ - xen/arch/x86/spec_ctrl.c | 65 ++++++++++++++++++++++++++----- - 3 files changed, 81 insertions(+), 14 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index a642e43476..46e9c58d35 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2234,8 +2234,9 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ , xen=, {pv,hvm,msr-sc,rsb,md-clear}=, --> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu, --> l1d-flush,branch-harden,srb-lock,unpriv-mmio}= ]` -+> bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, -+> eager-fpu,l1d-flush,branch-harden,srb-lock, -+> unpriv-mmio}= ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2285,9 +2286,10 @@ On hardware supporting IBRS (Indirect Branch Restricted Speculation), the - If Xen is not using IBRS itself, functionality is still set up so IBRS can be - virtualised for guests. - --On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` --option can be used to force (the default) or prevent Xen from issuing branch --prediction barriers on vcpu context switches. -+On hardware supporting STIBP (Single Thread Indirect Branch Predictors), the -+`stibp=` option can be used to force or prevent Xen using the feature itself. -+By default, Xen will use STIBP when IBRS is in use (IBRS implies STIBP), and -+when hardware hints recommend using it as a blanket setting. - - On hardware supporting SSBD (Speculative Store Bypass Disable), the `ssbd=` - option can be used to force or prevent Xen using the feature itself. On AMD -@@ -2295,6 +2297,15 @@ hardware, this is a global option applied at boot, and not virtualised for - guest use. On Intel hardware, the feature is virtualised for guests, - independently of Xen's choice of setting. - -+On hardware supporting PSFD (Predictive Store Forwarding Disable), the `psfd=` -+option can be used to force or prevent Xen using the feature itself. By -+default, Xen will not use PSFD. PSFD is implied by SSBD, and SSBD is off by -+default. -+ -+On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=` -+option can be used to force (the default) or prevent Xen from issuing branch -+prediction barriers on vcpu context switches. -+ - On all hardware, the `eager-fpu=` option can be used to force or prevent Xen - from using fully eager FPU context switches. This is currently implemented as - a global control. By default, Xen will choose to use fully eager context -diff --git a/xen/arch/x86/hvm/svm/vmcb.c b/xen/arch/x86/hvm/svm/vmcb.c -index 565e997155..ef7224eb5d 100644 ---- a/xen/arch/x86/hvm/svm/vmcb.c -+++ b/xen/arch/x86/hvm/svm/vmcb.c -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - - struct vmcb_struct *alloc_vmcb(void) - { -@@ -176,6 +177,14 @@ static int construct_vmcb(struct vcpu *v) - vmcb->_pause_filter_thresh = SVM_PAUSETHRESH_INIT; - } - -+ /* -+ * When default_xen_spec_ctrl simply SPEC_CTRL_STIBP, default this behind -+ * the back of the VM too. Our SMT topology isn't accurate, the overhead -+ * is neglegable, and doing this saves a WRMSR on the vmentry path. -+ */ -+ if ( default_xen_spec_ctrl == SPEC_CTRL_STIBP ) -+ v->arch.msrs->spec_ctrl.raw = SPEC_CTRL_STIBP; -+ - return 0; - } - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 1ed5ceda8b..dfdd45c358 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -48,9 +48,13 @@ static enum ind_thunk { - THUNK_LFENCE, - THUNK_JMP, - } opt_thunk __initdata = THUNK_DEFAULT; -+ - static int8_t __initdata opt_ibrs = -1; -+int8_t __initdata opt_stibp = -1; -+bool __read_mostly opt_ssbd; -+int8_t __initdata opt_psfd = -1; -+ - bool __read_mostly opt_ibpb = true; --bool __read_mostly opt_ssbd = false; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = true; -@@ -172,12 +176,20 @@ static int __init parse_spec_ctrl(const char *s) - else - rc = -EINVAL; - } -+ -+ /* Bits in MSR_SPEC_CTRL. */ - else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 ) - opt_ibrs = val; -- else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -- opt_ibpb = val; -+ else if ( (val = parse_boolean("stibp", s, ss)) >= 0 ) -+ opt_stibp = val; - else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 ) - opt_ssbd = val; -+ else if ( (val = parse_boolean("psfd", s, ss)) >= 0 ) -+ opt_psfd = val; -+ -+ /* Misc settings. */ -+ else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -+ opt_ibpb = val; - else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 ) - opt_eager_fpu = val; - else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) -@@ -376,7 +388,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s, Other:%s%s%s%s%s\n", -+ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s\n", - thunk == THUNK_NONE ? "N/A" : - thunk == THUNK_RETPOLINE ? "RETPOLINE" : - thunk == THUNK_LFENCE ? "LFENCE" : -@@ -390,6 +402,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (!boot_cpu_has(X86_FEATURE_SSBD) && - !boot_cpu_has(X86_FEATURE_AMD_SSBD)) ? "" : - (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-", -+ (!boot_cpu_has(X86_FEATURE_PSFD) && -+ !boot_cpu_has(X86_FEATURE_INTEL_PSFD)) ? "" : -+ (default_xen_spec_ctrl & SPEC_CTRL_PSFD) ? " PSFD+" : " PSFD-", - !(caps & ARCH_CAPS_TSX_CTRL) ? "" : - (opt_tsx & 1) ? " TSX+" : " TSX-", - !cpu_has_srbds_ctrl ? "" : -@@ -979,10 +994,7 @@ void __init init_speculation_mitigations(void) - if ( !has_spec_ctrl ) - printk(XENLOG_WARNING "?!? CET active, but no MSR_SPEC_CTRL?\n"); - else if ( opt_ibrs == -1 ) -- { - opt_ibrs = ibrs = true; -- default_xen_spec_ctrl |= SPEC_CTRL_IBRS | SPEC_CTRL_STIBP; -- } - - if ( opt_thunk == THUNK_DEFAULT || opt_thunk == THUNK_RETPOLINE ) - thunk = THUNK_JMP; -@@ -1086,14 +1098,49 @@ void __init init_speculation_mitigations(void) - setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); - } - -- /* If we have IBRS available, see whether we should use it. */ -+ /* Figure out default_xen_spec_ctrl. */ - if ( has_spec_ctrl && ibrs ) -+ { -+ /* IBRS implies STIBP. */ -+ if ( opt_stibp == -1 ) -+ opt_stibp = 1; -+ - default_xen_spec_ctrl |= SPEC_CTRL_IBRS; -+ } -+ -+ /* -+ * Use STIBP by default if the hardware hint is set. Otherwise, leave it -+ * off as it a severe performance pentalty on pre-eIBRS Intel hardware -+ * where it was retrofitted in microcode. -+ */ -+ if ( opt_stibp == -1 ) -+ opt_stibp = !!boot_cpu_has(X86_FEATURE_STIBP_ALWAYS); -+ -+ if ( opt_stibp && (boot_cpu_has(X86_FEATURE_STIBP) || -+ boot_cpu_has(X86_FEATURE_AMD_STIBP)) ) -+ default_xen_spec_ctrl |= SPEC_CTRL_STIBP; - -- /* If we have SSBD available, see whether we should use it. */ - if ( opt_ssbd && (boot_cpu_has(X86_FEATURE_SSBD) || - boot_cpu_has(X86_FEATURE_AMD_SSBD)) ) -+ { -+ /* SSBD implies PSFD */ -+ if ( opt_psfd == -1 ) -+ opt_psfd = 1; -+ - default_xen_spec_ctrl |= SPEC_CTRL_SSBD; -+ } -+ -+ /* -+ * Don't use PSFD by default. AMD designed the predictor to -+ * auto-clear on privilege change. PSFD is implied by SSBD, which is -+ * off by default. -+ */ -+ if ( opt_psfd == -1 ) -+ opt_psfd = 0; -+ -+ if ( opt_psfd && (boot_cpu_has(X86_FEATURE_PSFD) || -+ boot_cpu_has(X86_FEATURE_INTEL_PSFD)) ) -+ default_xen_spec_ctrl |= SPEC_CTRL_PSFD; - - /* - * PV guests can create RSB entries for any linear address they control, --- -2.30.2 - diff --git a/xen.git-eec5b02403a9df2523527caad24f17af5060fbe7.patch b/xen.git-eec5b02403a9df2523527caad24f17af5060fbe7.patch deleted file mode 100644 index 1394851..0000000 --- a/xen.git-eec5b02403a9df2523527caad24f17af5060fbe7.patch +++ /dev/null @@ -1,86 +0,0 @@ -From eec5b02403a9df2523527caad24f17af5060fbe7 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 12 Jul 2022 11:15:03 +0200 -Subject: [PATCH] xen/cmdline: Extend parse_boolean() to signal a name match - -This will help parsing a sub-option which has boolean and non-boolean options -available. - -First, rework 'int val' into 'bool has_neg_prefix'. This inverts it's value, -but the resulting logic is far easier to follow. - -Second, reject anything of the form 'no-$FOO=' which excludes ambiguous -constructs such as 'no-$foo=yes' which have never been valid. - -This just leaves the case where everything is otherwise fine, but parse_bool() -can't interpret the provided string. - -Signed-off-by: Andrew Cooper -Reviewed-by: Juergen Gross -Reviewed-by: Jan Beulich -master commit: 382326cac528dd1eb0d04efd5c05363c453e29f4 -master date: 2022-07-11 15:21:35 +0100 ---- - xen/common/kernel.c | 20 ++++++++++++++++---- - xen/include/xen/lib.h | 3 ++- - 2 files changed, 18 insertions(+), 5 deletions(-) - -diff --git a/xen/common/kernel.c b/xen/common/kernel.c -index e119e5401f..7ed96521f9 100644 ---- a/xen/common/kernel.c -+++ b/xen/common/kernel.c -@@ -272,9 +272,9 @@ int parse_bool(const char *s, const char *e) - int parse_boolean(const char *name, const char *s, const char *e) - { - size_t slen, nlen; -- int val = !!strncmp(s, "no-", 3); -+ bool has_neg_prefix = !strncmp(s, "no-", 3); - -- if ( !val ) -+ if ( has_neg_prefix ) - s += 3; - - slen = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); -@@ -286,11 +286,23 @@ int parse_boolean(const char *name, const char *s, const char *e) - - /* Exact, unadorned name? Result depends on the 'no-' prefix. */ - if ( slen == nlen ) -- return val; -+ return !has_neg_prefix; -+ -+ /* Inexact match with a 'no-' prefix? Not valid. */ -+ if ( has_neg_prefix ) -+ return -1; - - /* =$SOMETHING? Defer to the regular boolean parsing. */ - if ( s[nlen] == '=' ) -- return parse_bool(&s[nlen + 1], e); -+ { -+ int b = parse_bool(&s[nlen + 1], e); -+ -+ if ( b >= 0 ) -+ return b; -+ -+ /* Not a boolean, but the name matched. Signal specially. */ -+ return -2; -+ } - - /* Unrecognised. Give up. */ - return -1; -diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h -index c6987973bf..2296044caf 100644 ---- a/xen/include/xen/lib.h -+++ b/xen/include/xen/lib.h -@@ -80,7 +80,8 @@ int parse_bool(const char *s, const char *e); - /** - * Given a specific name, parses a string of the form: - * [no-]$NAME[=...] -- * returning 0 or 1 for a recognised boolean, or -1 for an error. -+ * returning 0 or 1 for a recognised boolean. Returns -1 for general errors, -+ * and -2 for "not a boolean, but $NAME= matches". - */ - int parse_boolean(const char *name, const char *s, const char *e); - --- -2.30.2 - diff --git a/xen.git-f066c8bb3e5686141cef6fa1dc86ea9f37c5388a.patch b/xen.git-f066c8bb3e5686141cef6fa1dc86ea9f37c5388a.patch deleted file mode 100644 index 56cdada..0000000 --- a/xen.git-f066c8bb3e5686141cef6fa1dc86ea9f37c5388a.patch +++ /dev/null @@ -1,137 +0,0 @@ -From f066c8bb3e5686141cef6fa1dc86ea9f37c5388a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper -Date: Tue, 12 Jul 2022 11:15:37 +0200 -Subject: [PATCH] x86/spec-ctrl: Add fine-grained cmdline suboptions for - primitives - -Support controling the PV/HVM suboption of msr-sc/rsb/md-clear, which -previously wasn't possible. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -master commit: 27357c394ba6e1571a89105b840ce1c6f026485c -master date: 2022-07-11 15:21:35 +0100 ---- - docs/misc/xen-command-line.pandoc | 12 ++++-- - xen/arch/x86/spec_ctrl.c | 66 ++++++++++++++++++++++++++----- - 2 files changed, 66 insertions(+), 12 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 46e9c58d35..1bbdb55129 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2233,7 +2233,8 @@ not be able to control the state of the mitigation. - By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) --> `= List of [ , xen=, {pv,hvm,msr-sc,rsb,md-clear}=, -+> `= List of [ , xen=, {pv,hvm}=, -+> {msr-sc,rsb,md-clear}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio}= ]` -@@ -2258,12 +2259,17 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` options offer fine - grained control over the primitives by Xen. These impact Xen's ability to --protect itself, and Xen's ability to virtualise support for guests to use. -+protect itself, and/or Xen's ability to virtualise support for guests to use. - - * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests - respectively. -+* Each other option can be used either as a plain boolean -+ (e.g. `spec-ctrl=rsb` to control both the PV and HVM sub-options), or with -+ `pv=` or `hvm=` subsuboptions (e.g. `spec-ctrl=rsb=no-hvm` to disable HVM -+ RSB only). -+ - * `msr-sc=` offers control over Xen's support for manipulating `MSR_SPEC_CTRL` - on entry and exit. These blocks are necessary to virtualise support for - guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index ae74943c10..9507e5da60 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -147,20 +147,68 @@ static int __init parse_spec_ctrl(const char *s) - opt_rsb_hvm = val; - opt_md_clear_hvm = val; - } -- else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 ) -+ else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) - { -- opt_msr_sc_pv = val; -- opt_msr_sc_hvm = val; -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_msr_sc_pv = opt_msr_sc_hvm = val; -+ break; -+ -+ case -2: -+ s += strlen("msr-sc="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_msr_sc_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_msr_sc_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } - } -- else if ( (val = parse_boolean("rsb", s, ss)) >= 0 ) -+ else if ( (val = parse_boolean("rsb", s, ss)) != -1 ) - { -- opt_rsb_pv = val; -- opt_rsb_hvm = val; -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_rsb_pv = opt_rsb_hvm = val; -+ break; -+ -+ case -2: -+ s += strlen("rsb="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_rsb_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_rsb_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } - } -- else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 ) -+ else if ( (val = parse_boolean("md-clear", s, ss)) != -1 ) - { -- opt_md_clear_pv = val; -- opt_md_clear_hvm = val; -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_md_clear_pv = opt_md_clear_hvm = val; -+ break; -+ -+ case -2: -+ s += strlen("md-clear="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_md_clear_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_md_clear_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } - } - - /* Xen's speculative sidechannel mitigation settings. */ --- -2.30.2 - diff --git a/xen.spec b/xen.spec index 5f6be71..db5b96d 100644 --- a/xen.spec +++ b/xen.spec @@ -54,8 +54,8 @@ Summary: Xen is a virtual machine monitor Name: xen -Version: 4.16.1 -Release: 8%{?dist} +Version: 4.16.2 +Release: 1%{?dist} License: GPLv2+ and LGPLv2+ and BSD URL: http://xen.org/ Source0: https://downloads.xenproject.org/release/xen/%{version}/xen-%{version}.tar.gz @@ -111,31 +111,6 @@ Patch43: xen.gcc11.fixes.patch Patch44: xsa376.patch Patch45: xen.gcc12.fixes.patch Patch46: xen.efi.build.patch -Patch47: xsa401-4.16-1.patch -Patch48: xsa401-4.16-2.patch -Patch49: xsa402-4.16-1.patch -Patch50: xsa402-4.16-2.patch -Patch51: xen.git-b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566.patch -Patch52: xsa402-4.16-3.patch -Patch53: xsa402-4.16-4.patch -Patch54: xsa402-4.16-5.patch -Patch55: xsa404-4.16-1.patch -Patch56: xsa404-4.16-2.patch -Patch57: xsa404-4.16-3.patch -Patch58: xsa403-4.16-1.patch -Patch59: xen.git-09d533f4c80b7eaf9fb4e36ebba8259580857a9d.patch -Patch60: xen.git-db6ca8176ccc4ff7dfe3c06969af9ebfab0d7b04.patch -Patch61: xen.git-eec5b02403a9df2523527caad24f17af5060fbe7.patch -Patch62: xen.git-f066c8bb3e5686141cef6fa1dc86ea9f37c5388a.patch -Patch63: xsa407-4.16-1.patch -Patch64: xsa407-4.16-2.patch -Patch65: xsa407-4.16-3.patch -Patch66: xsa407-4.16-4.patch -Patch67: xsa407-4.16-5.patch -Patch68: xsa407-4.16-6.patch -Patch69: xsa407-4.16-7.patch -Patch70: xsa407-4.16-8.patch -Patch71: xsa408.patch %if %build_qemutrad @@ -348,31 +323,6 @@ manage Xen virtual machines. %patch44 -p1 %patch45 -p1 %patch46 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 -%patch51 -p1 -%patch52 -p1 -%patch53 -p1 -%patch54 -p1 -%patch55 -p1 -%patch56 -p1 -%patch57 -p1 -%patch58 -p1 -%patch59 -p1 -%patch60 -p1 -%patch61 -p1 -%patch62 -p1 -%patch63 -p1 -%patch64 -p1 -%patch65 -p1 -%patch66 -p1 -%patch67 -p1 -%patch68 -p1 -%patch69 -p1 -%patch70 -p1 -%patch71 -p1 # qemu-xen-traditional patches pushd tools/qemu-xen-traditional @@ -988,6 +938,10 @@ fi %endif %changelog +* Sat Sep 17 2022 Michael Young - 4.16.2-1 +- update to xen-4.16.2 + remove or adjust patches now included or superceded upstream + * Tue Jul 26 2022 Michael Young - 4.16.1-8 - insufficient TLB flush for x86 PV guests in shadow mode [XSA-408, CVE-2022-33745] (#2112223) diff --git a/xsa401-4.16-1.patch b/xsa401-4.16-1.patch deleted file mode 100644 index 5c8c506..0000000 --- a/xsa401-4.16-1.patch +++ /dev/null @@ -1,170 +0,0 @@ -From: Andrew Cooper -Subject: x86/pv: Clean up _get_page_type() - -Various fixes for clarity, ahead of making complicated changes. - - * Split the overflow check out of the if/else chain for type handling, as - it's somewhat unrelated. - * Comment the main if/else chain to explain what is going on. Adjust one - ASSERT() and state the bit layout for validate-locked and partial states. - * Correct the comment about TLB flushing, as it's backwards. The problem - case is when writeable mappings are retained to a page becoming read-only, - as it allows the guest to bypass Xen's safety checks for updates. - * Reduce the scope of 'y'. It is an artefact of the cmpxchg loop and not - valid for use by subsequent logic. Switch to using ACCESS_ONCE() to treat - all reads as explicitly volatile. The only thing preventing the validated - wait-loop being infinite is the compiler barrier hidden in cpu_relax(). - * Replace one page_get_owner(page) with the already-calculated 'd' already in - scope. - -No functional change. - -This is part of XSA-401 / CVE-2022-26362. - -Signed-off-by: Andrew Cooper -Signed-off-by: George Dunlap -Reviewed-by: Jan Beulich -Reviewed-by: George Dunlap - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 796faca64103..ddd32f88c798 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2935,16 +2935,17 @@ static int _put_page_type(struct page_info *page, unsigned int flags, - static int _get_page_type(struct page_info *page, unsigned long type, - bool preemptible) - { -- unsigned long nx, x, y = page->u.inuse.type_info; -+ unsigned long nx, x; - int rc = 0; - - ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); - ASSERT(!in_irq()); - -- for ( ; ; ) -+ for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; ) - { - x = y; - nx = x + 1; -+ - if ( unlikely((nx & PGT_count_mask) == 0) ) - { - gdprintk(XENLOG_WARNING, -@@ -2952,8 +2953,15 @@ static int _get_page_type(struct page_info *page, unsigned long type, - mfn_x(page_to_mfn(page))); - return -EINVAL; - } -- else if ( unlikely((x & PGT_count_mask) == 0) ) -+ -+ if ( unlikely((x & PGT_count_mask) == 0) ) - { -+ /* -+ * Typeref 0 -> 1. -+ * -+ * Type changes are permitted when the typeref is 0. If the type -+ * actually changes, the page needs re-validating. -+ */ - struct domain *d = page_get_owner(page); - - if ( d && shadow_mode_enabled(d) ) -@@ -2964,8 +2972,8 @@ static int _get_page_type(struct page_info *page, unsigned long type, - { - /* - * On type change we check to flush stale TLB entries. It is -- * vital that no other CPUs are left with mappings of a frame -- * which is about to become writeable to the guest. -+ * vital that no other CPUs are left with writeable mappings -+ * to a frame which is intending to become pgtable/segdesc. - */ - cpumask_t *mask = this_cpu(scratch_cpumask); - -@@ -2977,7 +2985,7 @@ static int _get_page_type(struct page_info *page, unsigned long type, - - if ( unlikely(!cpumask_empty(mask)) && - /* Shadow mode: track only writable pages. */ -- (!shadow_mode_enabled(page_get_owner(page)) || -+ (!shadow_mode_enabled(d) || - ((nx & PGT_type_mask) == PGT_writable_page)) ) - { - perfc_incr(need_flush_tlb_flush); -@@ -3008,7 +3016,14 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) - { -- /* Don't log failure if it could be a recursive-mapping attempt. */ -+ /* -+ * else, we're trying to take a new reference, of the wrong type. -+ * -+ * This (being able to prohibit use of the wrong type) is what the -+ * typeref system exists for, but skip printing the failure if it -+ * looks like a recursive mapping, as subsequent logic might -+ * ultimately permit the attempt. -+ */ - if ( ((x & PGT_type_mask) == PGT_l2_page_table) && - (type == PGT_l1_page_table) ) - return -EINVAL; -@@ -3027,18 +3042,46 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - else if ( unlikely(!(x & PGT_validated)) ) - { -+ /* -+ * else, the count is non-zero, and we're grabbing the right type; -+ * but the page hasn't been validated yet. -+ * -+ * The page is in one of two states (depending on PGT_partial), -+ * and should have exactly one reference. -+ */ -+ ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1)); -+ - if ( !(x & PGT_partial) ) - { -- /* Someone else is updating validation of this page. Wait... */ -+ /* -+ * The page has been left in the "validate locked" state -+ * (i.e. PGT_[type] | 1) which means that a concurrent caller -+ * of _get_page_type() is in the middle of validation. -+ * -+ * Spin waiting for the concurrent user to complete (partial -+ * or fully validated), then restart our attempt to acquire a -+ * type reference. -+ */ - do { - if ( preemptible && hypercall_preempt_check() ) - return -EINTR; - cpu_relax(); -- } while ( (y = page->u.inuse.type_info) == x ); -+ } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x ); - continue; - } -- /* Type ref count was left at 1 when PGT_partial got set. */ -- ASSERT((x & PGT_count_mask) == 1); -+ -+ /* -+ * The page has been left in the "partial" state -+ * (i.e., PGT_[type] | PGT_partial | 1). -+ * -+ * Rather than bumping the type count, we need to try to grab the -+ * validation lock; if we succeed, we need to validate the page, -+ * then drop the general ref associated with the PGT_partial bit. -+ * -+ * We grab the validation lock by setting nx to (PGT_[type] | 1) -+ * (i.e., non-zero type count, neither PGT_validated nor -+ * PGT_partial set). -+ */ - nx = x & ~PGT_partial; - } - -@@ -3087,6 +3130,13 @@ static int _get_page_type(struct page_info *page, unsigned long type, - } - - out: -+ /* -+ * Did we drop the PGT_partial bit when acquiring the typeref? If so, -+ * drop the general reference that went along with it. -+ * -+ * N.B. validate_page() may have have re-set PGT_partial, not reflected in -+ * nx, but will have taken an extra ref when doing so. -+ */ - if ( (x & PGT_partial) && !(nx & PGT_partial) ) - put_page(page); - diff --git a/xsa401-4.16-2.patch b/xsa401-4.16-2.patch deleted file mode 100644 index be58db5..0000000 --- a/xsa401-4.16-2.patch +++ /dev/null @@ -1,191 +0,0 @@ -From: Andrew Cooper -Subject: x86/pv: Fix ABAC cmpxchg() race in _get_page_type() - -_get_page_type() suffers from a race condition where it incorrectly assumes -that because 'x' was read and a subsequent a cmpxchg() succeeds, the type -cannot have changed in-between. Consider: - -CPU A: - 1. Creates an L2e referencing pg - `-> _get_page_type(pg, PGT_l1_page_table), sees count 0, type PGT_writable_page - 2. Issues flush_tlb_mask() -CPU B: - 3. Creates a writeable mapping of pg - `-> _get_page_type(pg, PGT_writable_page), count increases to 1 - 4. Writes into new mapping, creating a TLB entry for pg - 5. Removes the writeable mapping of pg - `-> _put_page_type(pg), count goes back down to 0 -CPU A: - 7. Issues cmpxchg(), setting count 1, type PGT_l1_page_table - -CPU B now has a writeable mapping to pg, which Xen believes is a pagetable and -suitably protected (i.e. read-only). The TLB flush in step 2 must be deferred -until after the guest is prohibited from creating new writeable mappings, -which is after step 7. - -Defer all safety actions until after the cmpxchg() has successfully taken the -intended typeref, because that is what prevents concurrent users from using -the old type. - -Also remove the early validation for writeable and shared pages. This removes -race conditions where one half of a parallel mapping attempt can return -successfully before: - * The IOMMU pagetables are in sync with the new page type - * Writeable mappings to shared pages have been torn down - -This is part of XSA-401 / CVE-2022-26362. - -Reported-by: Jann Horn -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: George Dunlap - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index ddd32f88c798..1693b580b152 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2962,56 +2962,12 @@ static int _get_page_type(struct page_info *page, unsigned long type, - * Type changes are permitted when the typeref is 0. If the type - * actually changes, the page needs re-validating. - */ -- struct domain *d = page_get_owner(page); -- -- if ( d && shadow_mode_enabled(d) ) -- shadow_prepare_page_type_change(d, page, type); - - ASSERT(!(x & PGT_pae_xen_l2)); - if ( (x & PGT_type_mask) != type ) - { -- /* -- * On type change we check to flush stale TLB entries. It is -- * vital that no other CPUs are left with writeable mappings -- * to a frame which is intending to become pgtable/segdesc. -- */ -- cpumask_t *mask = this_cpu(scratch_cpumask); -- -- BUG_ON(in_irq()); -- cpumask_copy(mask, d->dirty_cpumask); -- -- /* Don't flush if the timestamp is old enough */ -- tlbflush_filter(mask, page->tlbflush_timestamp); -- -- if ( unlikely(!cpumask_empty(mask)) && -- /* Shadow mode: track only writable pages. */ -- (!shadow_mode_enabled(d) || -- ((nx & PGT_type_mask) == PGT_writable_page)) ) -- { -- perfc_incr(need_flush_tlb_flush); -- /* -- * If page was a page table make sure the flush is -- * performed using an IPI in order to avoid changing the -- * type of a page table page under the feet of -- * spurious_page_fault(). -- */ -- flush_mask(mask, -- (x & PGT_type_mask) && -- (x & PGT_type_mask) <= PGT_root_page_table -- ? FLUSH_TLB | FLUSH_FORCE_IPI -- : FLUSH_TLB); -- } -- -- /* We lose existing type and validity. */ - nx &= ~(PGT_type_mask | PGT_validated); - nx |= type; -- -- /* -- * No special validation needed for writable pages. -- * Page tables and GDT/LDT need to be scanned for validity. -- */ -- if ( type == PGT_writable_page || type == PGT_shared_page ) -- nx |= PGT_validated; - } - } - else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) ) -@@ -3092,6 +3048,56 @@ static int _get_page_type(struct page_info *page, unsigned long type, - return -EINTR; - } - -+ /* -+ * One typeref has been taken and is now globally visible. -+ * -+ * The page is either in the "validate locked" state (PGT_[type] | 1) or -+ * fully validated (PGT_[type] | PGT_validated | >0). -+ */ -+ -+ if ( unlikely((x & PGT_count_mask) == 0) ) -+ { -+ struct domain *d = page_get_owner(page); -+ -+ if ( d && shadow_mode_enabled(d) ) -+ shadow_prepare_page_type_change(d, page, type); -+ -+ if ( (x & PGT_type_mask) != type ) -+ { -+ /* -+ * On type change we check to flush stale TLB entries. It is -+ * vital that no other CPUs are left with writeable mappings -+ * to a frame which is intending to become pgtable/segdesc. -+ */ -+ cpumask_t *mask = this_cpu(scratch_cpumask); -+ -+ BUG_ON(in_irq()); -+ cpumask_copy(mask, d->dirty_cpumask); -+ -+ /* Don't flush if the timestamp is old enough */ -+ tlbflush_filter(mask, page->tlbflush_timestamp); -+ -+ if ( unlikely(!cpumask_empty(mask)) && -+ /* Shadow mode: track only writable pages. */ -+ (!shadow_mode_enabled(d) || -+ ((nx & PGT_type_mask) == PGT_writable_page)) ) -+ { -+ perfc_incr(need_flush_tlb_flush); -+ /* -+ * If page was a page table make sure the flush is -+ * performed using an IPI in order to avoid changing the -+ * type of a page table page under the feet of -+ * spurious_page_fault(). -+ */ -+ flush_mask(mask, -+ (x & PGT_type_mask) && -+ (x & PGT_type_mask) <= PGT_root_page_table -+ ? FLUSH_TLB | FLUSH_FORCE_IPI -+ : FLUSH_TLB); -+ } -+ } -+ } -+ - if ( unlikely(((x & PGT_type_mask) == PGT_writable_page) != - (type == PGT_writable_page)) ) - { -@@ -3120,13 +3126,25 @@ static int _get_page_type(struct page_info *page, unsigned long type, - - if ( unlikely(!(nx & PGT_validated)) ) - { -- if ( !(x & PGT_partial) ) -+ /* -+ * No special validation needed for writable or shared pages. Page -+ * tables and GDT/LDT need to have their contents audited. -+ * -+ * per validate_page(), non-atomic updates are fine here. -+ */ -+ if ( type == PGT_writable_page || type == PGT_shared_page ) -+ page->u.inuse.type_info |= PGT_validated; -+ else - { -- page->nr_validated_ptes = 0; -- page->partial_flags = 0; -- page->linear_pt_count = 0; -+ if ( !(x & PGT_partial) ) -+ { -+ page->nr_validated_ptes = 0; -+ page->partial_flags = 0; -+ page->linear_pt_count = 0; -+ } -+ -+ rc = validate_page(page, type, preemptible); - } -- rc = validate_page(page, type, preemptible); - } - - out: diff --git a/xsa402-4.16-1.patch b/xsa402-4.16-1.patch deleted file mode 100644 index b783383..0000000 --- a/xsa402-4.16-1.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Andrew Cooper -Subject: x86/page: Introduce _PAGE_* constants for memory types - -... rather than opencoding the PAT/PCD/PWT attributes in __PAGE_HYPERVISOR_* -constants. These are going to be needed by forthcoming logic. - -No functional change. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h -index 1d080cffbe84..2e542050f65a 100644 ---- a/xen/include/asm-x86/page.h -+++ b/xen/include/asm-x86/page.h -@@ -331,6 +331,14 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t); - - #define PAGE_CACHE_ATTRS (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT) - -+/* Memory types, encoded under Xen's choice of MSR_PAT. */ -+#define _PAGE_WB ( 0) -+#define _PAGE_WT ( _PAGE_PWT) -+#define _PAGE_UCM ( _PAGE_PCD ) -+#define _PAGE_UC ( _PAGE_PCD | _PAGE_PWT) -+#define _PAGE_WC (_PAGE_PAT ) -+#define _PAGE_WP (_PAGE_PAT | _PAGE_PWT) -+ - /* - * Debug option: Ensure that granted mappings are not implicitly unmapped. - * WARNING: This will need to be disabled to run OSes that use the spare PTE -@@ -349,8 +357,8 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t); - #define __PAGE_HYPERVISOR_RX (_PAGE_PRESENT | _PAGE_ACCESSED) - #define __PAGE_HYPERVISOR (__PAGE_HYPERVISOR_RX | \ - _PAGE_DIRTY | _PAGE_RW) --#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_PCD) --#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_PCD | _PAGE_PWT) -+#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_UCM) -+#define __PAGE_HYPERVISOR_UC (__PAGE_HYPERVISOR | _PAGE_UC) - #define __PAGE_HYPERVISOR_SHSTK (__PAGE_HYPERVISOR_RO | _PAGE_DIRTY) - - #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages mappings */ diff --git a/xsa402-4.16-2.patch b/xsa402-4.16-2.patch deleted file mode 100644 index ebb2f5e..0000000 --- a/xsa402-4.16-2.patch +++ /dev/null @@ -1,213 +0,0 @@ -From: Andrew Cooper -Subject: x86: Don't change the cacheability of the directmap - -Changeset 55f97f49b7ce ("x86: Change cache attributes of Xen 1:1 page mappings -in response to guest mapping requests") attempted to keep the cacheability -consistent between different mappings of the same page. - -The reason wasn't described in the changelog, but it is understood to be in -regards to a concern over machine check exceptions, owing to errata when using -mixed cacheabilities. It did this primarily by updating Xen's mapping of the -page in the direct map when the guest mapped a page with reduced cacheability. - -Unfortunately, the logic didn't actually prevent mixed cacheability from -occurring: - * A guest could map a page normally, and then map the same page with - different cacheability; nothing prevented this. - * The cacheability of the directmap was always latest-takes-precedence in - terms of guest requests. - * Grant-mapped frames with lesser cacheability didn't adjust the page's - cacheattr settings. - * The map_domain_page() function still unconditionally created WB mappings, - irrespective of the page's cacheattr settings. - -Additionally, update_xen_mappings() had a bug where the alias calculation was -wrong for mfn's which were .init content, which should have been treated as -fully guest pages, not Xen pages. - -Worse yet, the logic introduced a vulnerability whereby necessary -pagetable/segdesc adjustments made by Xen in the validation logic could become -non-coherent between the cache and main memory. The CPU could subsequently -operate on the stale value in the cache, rather than the safe value in main -memory. - -The directmap contains primarily mappings of RAM. PAT/MTRR conflict -resolution is asymmetric, and generally for MTRR=WB ranges, PAT of lesser -cacheability resolves to being coherent. The special case is WC mappings, -which are non-coherent against MTRR=WB regions (except for fully-coherent -CPUs). - -Xen must not have any WC cacheability in the directmap, to prevent Xen's -actions from creating non-coherency. (Guest actions creating non-coherency is -dealt with in subsequent patches.) As all memory types for MTRR=WB ranges -inter-operate coherently, so leave Xen's directmap mappings as WB. - -Only PV guests with access to devices can use reduced-cacheability mappings to -begin with, and they're trusted not to mount DoSs against the system anyway. - -Drop PGC_cacheattr_{base,mask} entirely, and the logic to manipulate them. -Shift the later PGC_* constants up, to gain 3 extra bits in the main reference -count. Retain the check in get_page_from_l1e() for special_pages() because a -guest has no business using reduced cacheability on these. - -This reverts changeset 55f97f49b7ce6c3520c555d19caac6cf3f9a5df0 - -This is CVE-2022-26363, part of XSA-402. - -Signed-off-by: Andrew Cooper -Reviewed-by: George Dunlap - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index c6429b0f749a..ab32d13a1a0d 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -783,28 +783,6 @@ bool is_iomem_page(mfn_t mfn) - return (page_get_owner(page) == dom_io); - } - --static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr) --{ -- int err = 0; -- bool alias = mfn >= PFN_DOWN(xen_phys_start) && -- mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START); -- unsigned long xen_va = -- XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT); -- -- if ( boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) ) -- return 0; -- -- if ( unlikely(alias) && cacheattr ) -- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, 0); -- if ( !err ) -- err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), _mfn(mfn), 1, -- PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr)); -- if ( unlikely(alias) && !cacheattr && !err ) -- err = map_pages_to_xen(xen_va, _mfn(mfn), 1, PAGE_HYPERVISOR); -- -- return err; --} -- - #ifndef NDEBUG - struct mmio_emul_range_ctxt { - const struct domain *d; -@@ -1009,47 +987,14 @@ get_page_from_l1e( - goto could_not_pin; - } - -- if ( pte_flags_to_cacheattr(l1f) != -- ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) ) -+ if ( (l1f & PAGE_CACHE_ATTRS) != _PAGE_WB && is_special_page(page) ) - { -- unsigned long x, nx, y = page->count_info; -- unsigned long cacheattr = pte_flags_to_cacheattr(l1f); -- int err; -- -- if ( is_special_page(page) ) -- { -- if ( write ) -- put_page_type(page); -- put_page(page); -- gdprintk(XENLOG_WARNING, -- "Attempt to change cache attributes of Xen heap page\n"); -- return -EACCES; -- } -- -- do { -- x = y; -- nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base); -- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); -- -- err = update_xen_mappings(mfn, cacheattr); -- if ( unlikely(err) ) -- { -- cacheattr = y & PGC_cacheattr_mask; -- do { -- x = y; -- nx = (x & ~PGC_cacheattr_mask) | cacheattr; -- } while ( (y = cmpxchg(&page->count_info, x, nx)) != x ); -- -- if ( write ) -- put_page_type(page); -- put_page(page); -- -- gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn -- " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n", -- mfn, get_gpfn_from_mfn(mfn), -- l1e_get_intpte(l1e), l1e_owner->domain_id); -- return err; -- } -+ if ( write ) -+ put_page_type(page); -+ put_page(page); -+ gdprintk(XENLOG_WARNING, -+ "Attempt to change cache attributes of Xen heap page\n"); -+ return -EACCES; - } - - return 0; -@@ -2467,25 +2412,10 @@ static int mod_l4_entry(l4_pgentry_t *pl4e, - */ - static int cleanup_page_mappings(struct page_info *page) - { -- unsigned int cacheattr = -- (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base; - int rc = 0; - unsigned long mfn = mfn_x(page_to_mfn(page)); - - /* -- * If we've modified xen mappings as a result of guest cache -- * attributes, restore them to the "normal" state. -- */ -- if ( unlikely(cacheattr) ) -- { -- page->count_info &= ~PGC_cacheattr_mask; -- -- BUG_ON(is_special_page(page)); -- -- rc = update_xen_mappings(mfn, 0); -- } -- -- /* - * If this may be in a PV domain's IOMMU, remove it. - * - * NB that writable xenheap pages have their type set and cleared by -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index cb9052749963..8a9a43bb0a9d 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -69,25 +69,22 @@ - /* Set when is using a page as a page table */ - #define _PGC_page_table PG_shift(3) - #define PGC_page_table PG_mask(1, 3) -- /* 3-bit PAT/PCD/PWT cache-attribute hint. */ --#define PGC_cacheattr_base PG_shift(6) --#define PGC_cacheattr_mask PG_mask(7, 6) - /* Page is broken? */ --#define _PGC_broken PG_shift(7) --#define PGC_broken PG_mask(1, 7) -+#define _PGC_broken PG_shift(4) -+#define PGC_broken PG_mask(1, 4) - /* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */ --#define PGC_state PG_mask(3, 9) --#define PGC_state_inuse PG_mask(0, 9) --#define PGC_state_offlining PG_mask(1, 9) --#define PGC_state_offlined PG_mask(2, 9) --#define PGC_state_free PG_mask(3, 9) -+#define PGC_state PG_mask(3, 6) -+#define PGC_state_inuse PG_mask(0, 6) -+#define PGC_state_offlining PG_mask(1, 6) -+#define PGC_state_offlined PG_mask(2, 6) -+#define PGC_state_free PG_mask(3, 6) - #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) - /* Page is not reference counted (see below for caveats) */ --#define _PGC_extra PG_shift(10) --#define PGC_extra PG_mask(1, 10) -+#define _PGC_extra PG_shift(7) -+#define PGC_extra PG_mask(1, 7) - - /* Count of references to this frame. */ --#define PGC_count_width PG_shift(10) -+#define PGC_count_width PG_shift(7) - #define PGC_count_mask ((1UL< -Subject: x86: Split cache_flush() out of cache_writeback() - -Subsequent changes will want a fully flushing version. - -Use the new helper rather than opencoding it in flush_area_local(). This -resolves an outstanding issue where the conditional sfence is on the wrong -side of the clflushopt loop. clflushopt is ordered with respect to older -stores, not to younger stores. - -Rename gnttab_cache_flush()'s helper to avoid colliding in name. -grant_table.c can see the prototype from cache.h so the build fails -otherwise. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -Xen 4.16 and earlier: - * Also backport half of c/s 3330013e67396 "VT-d / x86: re-arrange cache - syncing" to split cache_writeback() out of the IOMMU logic, but without the - associated hooks changes. - -diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c -index 25798df50f54..0c912b8669f8 100644 ---- a/xen/arch/x86/flushtlb.c -+++ b/xen/arch/x86/flushtlb.c -@@ -234,7 +234,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - if ( flags & FLUSH_CACHE ) - { - const struct cpuinfo_x86 *c = ¤t_cpu_data; -- unsigned long i, sz = 0; -+ unsigned long sz = 0; - - if ( order < (BITS_PER_LONG - PAGE_SHIFT) ) - sz = 1UL << (order + PAGE_SHIFT); -@@ -244,13 +244,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - c->x86_clflush_size && c->x86_cache_size && sz && - ((sz >> 10) < c->x86_cache_size) ) - { -- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -- for ( i = 0; i < sz; i += c->x86_clflush_size ) -- alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";" -- " clflush %0", -- "data16 clflush %0", /* clflushopt */ -- X86_FEATURE_CLFLUSHOPT, -- "m" (((const char *)va)[i])); -+ cache_flush(va, sz); - flags &= ~FLUSH_CACHE; - } - else -@@ -265,6 +259,80 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - return flags; - } - -+void cache_flush(const void *addr, unsigned int size) -+{ -+ /* -+ * This function may be called before current_cpu_data is established. -+ * Hence a fallback is needed to prevent the loop below becoming infinite. -+ */ -+ unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16; -+ const void *end = addr + size; -+ -+ addr -= (unsigned long)addr & (clflush_size - 1); -+ for ( ; addr < end; addr += clflush_size ) -+ { -+ /* -+ * Note regarding the "ds" prefix use: it's faster to do a clflush -+ * + prefix than a clflush + nop, and hence the prefix is added instead -+ * of letting the alternative framework fill the gap by appending nops. -+ */ -+ alternative_io("ds; clflush %[p]", -+ "data16 clflush %[p]", /* clflushopt */ -+ X86_FEATURE_CLFLUSHOPT, -+ /* no outputs */, -+ [p] "m" (*(const char *)(addr))); -+ } -+ -+ alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -+} -+ -+void cache_writeback(const void *addr, unsigned int size) -+{ -+ unsigned int clflush_size; -+ const void *end = addr + size; -+ -+ /* Fall back to CLFLUSH{,OPT} when CLWB isn't available. */ -+ if ( !boot_cpu_has(X86_FEATURE_CLWB) ) -+ return cache_flush(addr, size); -+ -+ /* -+ * This function may be called before current_cpu_data is established. -+ * Hence a fallback is needed to prevent the loop below becoming infinite. -+ */ -+ clflush_size = current_cpu_data.x86_clflush_size ?: 16; -+ addr -= (unsigned long)addr & (clflush_size - 1); -+ for ( ; addr < end; addr += clflush_size ) -+ { -+/* -+ * The arguments to a macro must not include preprocessor directives. Doing so -+ * results in undefined behavior, so we have to create some defines here in -+ * order to avoid it. -+ */ -+#if defined(HAVE_AS_CLWB) -+# define CLWB_ENCODING "clwb %[p]" -+#elif defined(HAVE_AS_XSAVEOPT) -+# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ -+#else -+# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ -+#endif -+ -+#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) -+#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) -+# define INPUT BASE_INPUT -+#else -+# define INPUT(addr) "a" (addr), BASE_INPUT(addr) -+#endif -+ -+ asm volatile (CLWB_ENCODING :: INPUT(addr)); -+ -+#undef INPUT -+#undef BASE_INPUT -+#undef CLWB_ENCODING -+ } -+ -+ asm volatile ("sfence" ::: "memory"); -+} -+ - unsigned int guest_flush_tlb_flags(const struct domain *d) - { - bool shadow = paging_mode_shadow(d); -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 66f8ce71741c..4c742cd8fe81 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -3431,7 +3431,7 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop, - return 0; - } - --static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) -+static int _cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref) - { - struct domain *d, *owner; - struct page_info *page; -@@ -3525,7 +3525,7 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop, - return -EFAULT; - for ( ; ; ) - { -- int ret = cache_flush(&op, cur_ref); -+ int ret = _cache_flush(&op, cur_ref); - - if ( ret < 0 ) - return ret; -diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h -index 01e010a10d61..401079299725 100644 ---- a/xen/drivers/passthrough/vtd/extern.h -+++ b/xen/drivers/passthrough/vtd/extern.h -@@ -76,7 +76,6 @@ int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu, - struct pci_dev *pdev, - u16 did, u16 size, u64 addr); - --unsigned int get_cache_line_size(void); - void flush_all_cache(void); - - uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node); -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index 8975c1de61bc..bc377c9bcfa4 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -206,54 +207,6 @@ static void check_cleanup_domid_map(const struct domain *d, - } - } - --static void sync_cache(const void *addr, unsigned int size) --{ -- static unsigned long clflush_size = 0; -- const void *end = addr + size; -- -- if ( clflush_size == 0 ) -- clflush_size = get_cache_line_size(); -- -- addr -= (unsigned long)addr & (clflush_size - 1); -- for ( ; addr < end; addr += clflush_size ) --/* -- * The arguments to a macro must not include preprocessor directives. Doing so -- * results in undefined behavior, so we have to create some defines here in -- * order to avoid it. -- */ --#if defined(HAVE_AS_CLWB) --# define CLWB_ENCODING "clwb %[p]" --#elif defined(HAVE_AS_XSAVEOPT) --# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */ --#else --# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */ --#endif -- --#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr)) --#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT) --# define INPUT BASE_INPUT --#else --# define INPUT(addr) "a" (addr), BASE_INPUT(addr) --#endif -- /* -- * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush -- * + prefix than a clflush + nop, and hence the prefix is added instead -- * of letting the alternative framework fill the gap by appending nops. -- */ -- alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]", -- "data16 clflush %[p]", /* clflushopt */ -- X86_FEATURE_CLFLUSHOPT, -- CLWB_ENCODING, -- X86_FEATURE_CLWB, /* no outputs */, -- INPUT(addr)); --#undef INPUT --#undef BASE_INPUT --#undef CLWB_ENCODING -- -- alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT, -- "sfence", X86_FEATURE_CLWB); --} -- - /* Allocate page table, return its machine address */ - uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node) - { -@@ -273,7 +226,7 @@ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node) - clear_page(vaddr); - - if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache ) -- sync_cache(vaddr, PAGE_SIZE); -+ cache_writeback(vaddr, PAGE_SIZE); - unmap_domain_page(vaddr); - cur_pg++; - } -@@ -1305,7 +1258,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd) - iommu->nr_pt_levels = agaw_to_level(agaw); - - if ( !ecap_coherent(iommu->ecap) ) -- vtd_ops.sync_cache = sync_cache; -+ vtd_ops.sync_cache = cache_writeback; - - /* allocate domain id bitmap */ - iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom)); -diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c -index 6681dccd6970..55f0faa521cb 100644 ---- a/xen/drivers/passthrough/vtd/x86/vtd.c -+++ b/xen/drivers/passthrough/vtd/x86/vtd.c -@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *va) - unmap_domain_page(va); - } - --unsigned int get_cache_line_size(void) --{ -- return ((cpuid_ebx(1) >> 8) & 0xff) * 8; --} -- - void flush_all_cache() - { - wbinvd(); -diff --git a/xen/include/asm-x86/cache.h b/xen/include/asm-x86/cache.h -index 1f7173d8c72c..e4770efb22b9 100644 ---- a/xen/include/asm-x86/cache.h -+++ b/xen/include/asm-x86/cache.h -@@ -11,4 +11,11 @@ - - #define __read_mostly __section(".data.read_mostly") - -+#ifndef __ASSEMBLY__ -+ -+void cache_flush(const void *addr, unsigned int size); -+void cache_writeback(const void *addr, unsigned int size); -+ -+#endif -+ - #endif diff --git a/xsa402-4.16-4.patch b/xsa402-4.16-4.patch deleted file mode 100644 index 2110922..0000000 --- a/xsa402-4.16-4.patch +++ /dev/null @@ -1,83 +0,0 @@ -From: Andrew Cooper -Subject: x86/amd: Work around CLFLUSH ordering on older parts - -On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakely ordered with everything, -including reads and writes to the address, and LFENCE/SFENCE instructions. - -This creates a multitude of problematic corner cases, laid out in the manual. -Arrange to use MFENCE on both sides of the CLFLUSH to force proper ordering. - -This is part of XSA-402. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index a8e37dbb1f5c..b3b9a0df5fed 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -812,6 +812,14 @@ static void init_amd(struct cpuinfo_x86 *c) - if (!cpu_has_lfence_dispatch) - __set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability); - -+ /* -+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with -+ * everything, including reads and writes to address, and -+ * LFENCE/SFENCE instructions. -+ */ -+ if (!cpu_has_clflushopt) -+ setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE); -+ - switch(c->x86) - { - case 0xf ... 0x11: -diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c -index 0c912b8669f8..dcbb4064012e 100644 ---- a/xen/arch/x86/flushtlb.c -+++ b/xen/arch/x86/flushtlb.c -@@ -259,6 +259,13 @@ unsigned int flush_area_local(const void *va, unsigned int flags) - return flags; - } - -+/* -+ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with everything, -+ * including reads and writes to address, and LFENCE/SFENCE instructions. -+ * -+ * This function only works safely after alternatives have run. Luckily, at -+ * the time of writing, we don't flush the caches that early. -+ */ - void cache_flush(const void *addr, unsigned int size) - { - /* -@@ -268,6 +275,8 @@ void cache_flush(const void *addr, unsigned int size) - unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16; - const void *end = addr + size; - -+ alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE); -+ - addr -= (unsigned long)addr & (clflush_size - 1); - for ( ; addr < end; addr += clflush_size ) - { -@@ -283,7 +292,9 @@ void cache_flush(const void *addr, unsigned int size) - [p] "m" (*(const char *)(addr))); - } - -- alternative("", "sfence", X86_FEATURE_CLFLUSHOPT); -+ alternative_2("", -+ "sfence", X86_FEATURE_CLFLUSHOPT, -+ "mfence", X86_BUG_CLFLUSH_MFENCE); - } - - void cache_writeback(const void *addr, unsigned int size) -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index 7413febd7ad8..ff3157d52d13 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -47,6 +47,7 @@ XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch - - #define X86_BUG_FPU_PTRS X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */ - #define X86_BUG_NULL_SEG X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */ -+#define X86_BUG_CLFLUSH_MFENCE X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */ - - /* Total number of capability words, inc synth and bug words. */ - #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */ diff --git a/xsa402-4.16-5.patch b/xsa402-4.16-5.patch deleted file mode 100644 index 4806d25..0000000 --- a/xsa402-4.16-5.patch +++ /dev/null @@ -1,148 +0,0 @@ -From: Andrew Cooper -Subject: x86/pv: Track and flush non-coherent mappings of RAM - -There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with -devices that make non-coherent writes. The Linux sound subsystem makes -extensive use of this technique. - -For such usecases, the guest's DMA buffer is mapped and consistently used as -WC, and Xen doesn't interact with the buffer. - -However, a mischevious guest can use WC mappings to deliberately create -non-coherency between the cache and RAM, and use this to trick Xen into -validating a pagetable which isn't actually safe. - -Allocate a new PGT_non_coherent to track the non-coherency of mappings. Set -it whenever a non-coherent writeable mapping is created. If the page is used -as anything other than PGT_writable_page, force a cache flush before -validation. Also force a cache flush before the page is returned to the heap. - -This is CVE-2022-26364, part of XSA-402. - -Reported-by: Jann Horn -Signed-off-by: Andrew Cooper -Reviewed-by: George Dunlap -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index ab32d13a1a0d..bab9624fabb7 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -997,6 +997,15 @@ get_page_from_l1e( - return -EACCES; - } - -+ /* -+ * Track writeable non-coherent mappings to RAM pages, to trigger a cache -+ * flush later if the target is used as anything but a PGT_writeable page. -+ * We care about all writeable mappings, including foreign mappings. -+ */ -+ if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) && -+ (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) ) -+ set_bit(_PGT_non_coherent, &page->u.inuse.type_info); -+ - return 0; - - could_not_pin: -@@ -2454,6 +2463,19 @@ static int cleanup_page_mappings(struct page_info *page) - } - } - -+ /* -+ * Flush the cache if there were previously non-coherent writeable -+ * mappings of this page. This forces the page to be coherent before it -+ * is freed back to the heap. -+ */ -+ if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) ) -+ { -+ void *addr = __map_domain_page(page); -+ -+ cache_flush(addr, PAGE_SIZE); -+ unmap_domain_page(addr); -+ } -+ - return rc; - } - -@@ -3028,6 +3050,22 @@ static int _get_page_type(struct page_info *page, unsigned long type, - if ( unlikely(!(nx & PGT_validated)) ) - { - /* -+ * Flush the cache if there were previously non-coherent mappings of -+ * this page, and we're trying to use it as anything other than a -+ * writeable page. This forces the page to be coherent before we -+ * validate its contents for safety. -+ */ -+ if ( (nx & PGT_non_coherent) && type != PGT_writable_page ) -+ { -+ void *addr = __map_domain_page(page); -+ -+ cache_flush(addr, PAGE_SIZE); -+ unmap_domain_page(addr); -+ -+ page->u.inuse.type_info &= ~PGT_non_coherent; -+ } -+ -+ /* - * No special validation needed for writable or shared pages. Page - * tables and GDT/LDT need to have their contents audited. - * -diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c -index 0325618c9883..81c72e61ed55 100644 ---- a/xen/arch/x86/pv/grant_table.c -+++ b/xen/arch/x86/pv/grant_table.c -@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame, - - ol1e = *pl1e; - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) ) -+ { -+ /* -+ * We always create mappings in this path. However, our caller, -+ * map_grant_ref(), only passes potentially non-zero cache_flags for -+ * MMIO frames, so this path doesn't create non-coherent mappings of -+ * RAM frames and there's no need to calculate PGT_non_coherent. -+ */ -+ ASSERT(!cache_flags || is_iomem_page(frame)); -+ - rc = GNTST_okay; -+ } - - out_unlock: - page_unlock(page); -@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame, - l1e_get_flags(ol1e), addr, grant_pte_flags); - - if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) ) -+ { -+ /* -+ * Generally, replace_grant_pv_mapping() is used to destroy mappings -+ * (n1le = l1e_empty()), but it can be a present mapping on the -+ * GNTABOP_unmap_and_replace path. -+ * -+ * In such cases, the PTE is fully transplanted from its old location -+ * via steal_linear_addr(), so we need not perform PGT_non_coherent -+ * checking here. -+ */ - rc = GNTST_okay; -+ } - - out_unlock: - page_unlock(page); -diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h -index 8a9a43bb0a9d..7464167ae192 100644 ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -53,8 +53,12 @@ - #define _PGT_partial PG_shift(8) - #define PGT_partial PG_mask(1, 8) - -+/* Has this page been mapped writeable with a non-coherent memory type? */ -+#define _PGT_non_coherent PG_shift(9) -+#define PGT_non_coherent PG_mask(1, 9) -+ - /* Count of uses of this frame as its current type. */ --#define PGT_count_width PG_shift(8) -+#define PGT_count_width PG_shift(9) - #define PGT_count_mask ((1UL< -Date: Thu, 30 Jun 2022 14:35:35 +0200 -Subject: [PATCH] tools/libxl: env variable to signal whether disk/nic backend - is trusted -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Introduce support in libxl for fetching the default backend trusted -option for disk and nic devices. - -Users can set libxl_{disk,nic}_backend_untrusted environment variable -to notify libxl of whether the backends for disk and nic devices -should be trusted. Such information is passed into the frontend so it -can take the appropriate measures. - -This is part of XSA-403. - -Signed-off-by: Roger Pau Monné ---- - tools/libs/light/libxl_disk.c | 3 +++ - tools/libs/light/libxl_nic.c | 3 +++ - 2 files changed, 6 insertions(+) - -diff --git a/tools/libs/light/libxl_disk.c b/tools/libs/light/libxl_disk.c -index 93936d0dd0..0aaf6afce8 100644 ---- a/tools/libs/light/libxl_disk.c -+++ b/tools/libs/light/libxl_disk.c -@@ -395,6 +395,9 @@ static void device_disk_add(libxl__egc *egc, uint32_t domid, - flexarray_append(front, GCSPRINTF("%d", device->devid)); - flexarray_append(front, "device-type"); - flexarray_append(front, disk->is_cdrom ? "cdrom" : "disk"); -+ flexarray_append(front, "trusted"); -+ flexarray_append(front, getenv("libxl_disk_backend_untrusted") ? "0" -+ : "1"); - - /* - * Old PV kernel disk frontends before 2.6.26 rely on tool stack to -diff --git a/tools/libs/light/libxl_nic.c b/tools/libs/light/libxl_nic.c -index 0b9e70c9d1..34c3fe6df0 100644 ---- a/tools/libs/light/libxl_nic.c -+++ b/tools/libs/light/libxl_nic.c -@@ -255,6 +255,9 @@ static int libxl__set_xenstore_nic(libxl__gc *gc, uint32_t domid, - flexarray_append(back, "hotplug-status"); - flexarray_append(back, ""); - -+ flexarray_append(front, "trusted"); -+ flexarray_append(front, getenv("libxl_nic_backend_untrusted") ? "0" : "1"); -+ - return 0; - } - --- -2.37.0 - diff --git a/xsa404-4.16-1.patch b/xsa404-4.16-1.patch deleted file mode 100644 index c101279..0000000 --- a/xsa404-4.16-1.patch +++ /dev/null @@ -1,239 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Make VERW flushing runtime conditional - -Currently, VERW flushing to mitigate MDS is boot time conditional per domain -type. However, to provide mitigations for DRPW (CVE-2022-21166), we need to -conditionally use VERW based on the trustworthiness of the guest, and the -devices passed through. - -Remove the PV/HVM alternatives and instead issue a VERW on the return-to-guest -path depending on the SCF_verw bit in cpuinfo spec_ctrl_flags. - -Introduce spec_ctrl_init_domain() and d->arch.verw to calculate the VERW -disposition at domain creation time, and context switch the SCF_verw bit. - -For now, VERW flushing is used and controlled exactly as before, but later -patches will add per-domain cases too. - -No change in behaviour. - -This is part of XSA-404. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich -Reviewed-by: Roger Pau Monné - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 1d08fb7e9aa6..d5cb09f86541 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2258,9 +2258,8 @@ in place for guests to use. - Use of a positive boolean value for either of these options is invalid. - - The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine --grained control over the alternative blocks used by Xen. These impact Xen's --ability to protect itself, and Xen's ability to virtualise support for guests --to use. -+grained control over the primitives by Xen. These impact Xen's ability to -+protect itself, and Xen's ability to virtualise support for guests to use. - - * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests - respectively. -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index ef1812dc1402..1fe6644a71ae 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -863,6 +863,8 @@ int arch_domain_create(struct domain *d, - - d->arch.msr_relaxed = config->arch.misc_flags & XEN_X86_MSR_RELAXED; - -+ spec_ctrl_init_domain(d); -+ - return 0; - - fail: -@@ -2017,14 +2019,15 @@ static void __context_switch(void) - void context_switch(struct vcpu *prev, struct vcpu *next) - { - unsigned int cpu = smp_processor_id(); -+ struct cpu_info *info = get_cpu_info(); - const struct domain *prevd = prev->domain, *nextd = next->domain; - unsigned int dirty_cpu = read_atomic(&next->dirty_cpu); - - ASSERT(prev != next); - ASSERT(local_irq_is_enabled()); - -- get_cpu_info()->use_pv_cr3 = false; -- get_cpu_info()->xen_cr3 = 0; -+ info->use_pv_cr3 = false; -+ info->xen_cr3 = 0; - - if ( unlikely(dirty_cpu != cpu) && dirty_cpu != VCPU_CPU_CLEAN ) - { -@@ -2088,6 +2091,11 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - *last_id = next_id; - } - } -+ -+ /* Update the top-of-stack block with the VERW disposition. */ -+ info->spec_ctrl_flags &= ~SCF_verw; -+ if ( nextd->arch.verw ) -+ info->spec_ctrl_flags |= SCF_verw; - } - - sched_context_switched(prev, next); -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 49651f3c435a..5f5de45a1309 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -87,7 +87,7 @@ UNLIKELY_END(realmode) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ - /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */ -- ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM -+ DO_SPEC_CTRL_COND_VERW - - mov VCPU_hvm_guest_cr2(%rbx),%rax - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index c19464da70ce..21730aa03071 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -36,8 +36,8 @@ static bool __initdata opt_msr_sc_pv = true; - static bool __initdata opt_msr_sc_hvm = true; - static int8_t __initdata opt_rsb_pv = -1; - static bool __initdata opt_rsb_hvm = true; --static int8_t __initdata opt_md_clear_pv = -1; --static int8_t __initdata opt_md_clear_hvm = -1; -+static int8_t __read_mostly opt_md_clear_pv = -1; -+static int8_t __read_mostly opt_md_clear_hvm = -1; - - /* Cmdline controls for Xen's speculative settings. */ - static enum ind_thunk { -@@ -932,6 +932,13 @@ static __init void mds_calculations(uint64_t caps) - } - } - -+void spec_ctrl_init_domain(struct domain *d) -+{ -+ bool pv = is_pv_domain(d); -+ -+ d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm; -+} -+ - void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; -@@ -1196,21 +1203,20 @@ void __init init_speculation_mitigations(void) - boot_cpu_has(X86_FEATURE_MD_CLEAR)); - - /* -- * Enable MDS defences as applicable. The PV blocks need using all the -- * time, and the Idle blocks need using if either PV or HVM defences are -- * used. -+ * Enable MDS defences as applicable. The Idle blocks need using if -+ * either PV or HVM defences are used. - * - * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with -- * equivelent semantics to avoid needing to perform both flushes on the -- * HVM path. The HVM blocks don't need activating if our hypervisor told -- * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves. -+ * equivalent semantics to avoid needing to perform both flushes on the -+ * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH. -+ * -+ * After calculating the appropriate idle setting, simplify -+ * opt_md_clear_hvm to mean just "should we VERW on the way into HVM -+ * guests", so spec_ctrl_init_domain() can calculate suitable settings. - */ -- if ( opt_md_clear_pv ) -- setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV); - if ( opt_md_clear_pv || opt_md_clear_hvm ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush ) -- setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM); -+ opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index ff3157d52d13..bd45a144ee78 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -35,8 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM, X86_SYNTH(19)) /* RSB overwrite needed for HVM - XEN_CPUFEATURE(XEN_SELFSNOOP, X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */ - XEN_CPUFEATURE(SC_MSR_IDLE, X86_SYNTH(21)) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */ - XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ --XEN_CPUFEATURE(SC_VERW_PV, X86_SYNTH(23)) /* VERW used by Xen for PV */ --XEN_CPUFEATURE(SC_VERW_HVM, X86_SYNTH(24)) /* VERW used by Xen for HVM */ -+/* Bits 23,24 unused. */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ - XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ - XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */ -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 92d54de0b9a1..2398a1d99da9 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -319,6 +319,9 @@ struct arch_domain - uint32_t pci_cf8; - uint8_t cmos_idx; - -+ /* Use VERW on return-to-guest for its flushing side effect. */ -+ bool verw; -+ - union { - struct pv_domain pv; - struct hvm_domain hvm; -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index f76029523610..751355f471f4 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -24,6 +24,7 @@ - #define SCF_use_shadow (1 << 0) - #define SCF_ist_wrmsr (1 << 1) - #define SCF_ist_rsb (1 << 2) -+#define SCF_verw (1 << 3) - - #ifndef __ASSEMBLY__ - -@@ -32,6 +33,7 @@ - #include - - void init_speculation_mitigations(void); -+void spec_ctrl_init_domain(struct domain *d); - - extern bool opt_ibpb; - extern bool opt_ssbd; -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 02b3b18ce69f..5a590bac44aa 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -136,6 +136,19 @@ - #endif - .endm - -+.macro DO_SPEC_CTRL_COND_VERW -+/* -+ * Requires %rsp=cpuinfo -+ * -+ * Issue a VERW for its flushing side effect, if indicated. This is a Spectre -+ * v1 gadget, but the IRET/VMEntry is serialising. -+ */ -+ testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp) -+ jz .L\@_verw_skip -+ verw CPUINFO_verw_sel(%rsp) -+.L\@_verw_skip: -+.endm -+ - .macro DO_SPEC_CTRL_ENTRY maybexen:req - /* - * Requires %rsp=regs (also cpuinfo if !maybexen) -@@ -231,8 +244,7 @@ - #define SPEC_CTRL_EXIT_TO_PV \ - ALTERNATIVE "", \ - DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ -- ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ -- X86_FEATURE_SC_VERW_PV -+ DO_SPEC_CTRL_COND_VERW - - /* - * Use in IST interrupt/exception context. May interrupt Xen or PV context. diff --git a/xsa404-4.16-2.patch b/xsa404-4.16-2.patch deleted file mode 100644 index 732b26a..0000000 --- a/xsa404-4.16-2.patch +++ /dev/null @@ -1,85 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Enumeration for MMIO Stale Data controls - -The three *_NO bits indicate non-susceptibility to the SSDP, FBSDP and PSDP -data movement primitives. - -FB_CLEAR indicates that the VERW instruction has re-gained it's Fill Buffer -flushing side effect. This is only enumerated on parts where VERW had -previously lost it's flushing side effect due to the MDS/TAA vulnerabilities -being fixed in hardware. - -FB_CLEAR_CTRL is available on a subset of FB_CLEAR parts where the Fill Buffer -clearing side effect of VERW can be turned off for performance reasons. - -This is part of XSA-404. - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 21730aa03071..d285538bde9f 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -323,7 +323,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -332,13 +332,16 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : "", - (caps & ARCH_CAPS_MDS_NO) ? " MDS_NO" : "", - (caps & ARCH_CAPS_TAA_NO) ? " TAA_NO" : "", -+ (caps & ARCH_CAPS_SBDR_SSDP_NO) ? " SBDR_SSDP_NO" : "", -+ (caps & ARCH_CAPS_FBSDP_NO) ? " FBSDP_NO" : "", -+ (caps & ARCH_CAPS_PSDP_NO) ? " PSDP_NO" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : ""); - - /* Hardware features which need driving to mitigate issues. */ -- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", - (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || - (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || -@@ -353,7 +356,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR)) ? " MD_CLEAR" : "", - (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL)) ? " SRBDS_CTRL" : "", - (e8b & cpufeat_mask(X86_FEATURE_VIRT_SSBD)) ? " VIRT_SSBD" : "", -- (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : ""); -+ (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", -+ (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", -+ (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : ""); - - /* Compiled-in support which pertains to mitigations. */ - if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 31964b88af7a..72bc32ba04ff 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -66,6 +66,11 @@ - #define ARCH_CAPS_IF_PSCHANGE_MC_NO (_AC(1, ULL) << 6) - #define ARCH_CAPS_TSX_CTRL (_AC(1, ULL) << 7) - #define ARCH_CAPS_TAA_NO (_AC(1, ULL) << 8) -+#define ARCH_CAPS_SBDR_SSDP_NO (_AC(1, ULL) << 13) -+#define ARCH_CAPS_FBSDP_NO (_AC(1, ULL) << 14) -+#define ARCH_CAPS_PSDP_NO (_AC(1, ULL) << 15) -+#define ARCH_CAPS_FB_CLEAR (_AC(1, ULL) << 17) -+#define ARCH_CAPS_FB_CLEAR_CTRL (_AC(1, ULL) << 18) - - #define MSR_FLUSH_CMD 0x0000010b - #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) -@@ -83,6 +88,7 @@ - #define MCU_OPT_CTRL_RNGDS_MITG_DIS (_AC(1, ULL) << 0) - #define MCU_OPT_CTRL_RTM_ALLOW (_AC(1, ULL) << 1) - #define MCU_OPT_CTRL_RTM_LOCKED (_AC(1, ULL) << 2) -+#define MCU_OPT_CTRL_FB_CLEAR_DIS (_AC(1, ULL) << 3) - - #define MSR_RTIT_OUTPUT_BASE 0x00000560 - #define MSR_RTIT_OUTPUT_MASK 0x00000561 diff --git a/xsa404-4.16-3.patch b/xsa404-4.16-3.patch deleted file mode 100644 index dab0c31..0000000 --- a/xsa404-4.16-3.patch +++ /dev/null @@ -1,177 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Add spec-ctrl=unpriv-mmio - -Per Xen's support statement, PCI passthrough should be to trusted domains -because the overall system security depends on factors outside of Xen's -control. - -As such, Xen, in a supported configuration, is not vulnerable to DRPW/SBDR. - -However, users who have risk assessed their configuration may be happy with -the risk of DoS, but unhappy with the risk of cross-domain data leakage. Such -users should enable this option. - -On CPUs vulnerable to MDS, the existing mitigations are the best we can do to -mitigate MMIO cross-domain data leakage. - -On CPUs fixed to MDS but vulnerable MMIO stale data leakage, this option: - - * On CPUs susceptible to FBSDP, mitigates cross-domain fill buffer leakage - using FB_CLEAR. - * On CPUs susceptible to SBDR, mitigates RNG data recovery by engaging the - srb-lock, previously used to mitigate SRBDS. - -Both mitigations require microcode from IPU 2022.1, May 2022. - -This is part of XSA-404. - -Signed-off-by: Andrew Cooper -Reviewed-by: Roger Pau Monné ---- -Backporting note: For Xen 4.7 and earlier with bool_t not aliasing bool, the -ARCH_CAPS_FB_CLEAR hunk needs !! - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index d5cb09f86541..a642e43476a2 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2235,7 +2235,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - ### spec-ctrl (x86) - > `= List of [ , xen=, {pv,hvm,msr-sc,rsb,md-clear}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu, --> l1d-flush,branch-harden,srb-lock}= ]` -+> l1d-flush,branch-harden,srb-lock,unpriv-mmio}= ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2314,8 +2314,16 @@ Xen will enable this mitigation. - On hardware supporting SRBDS_CTRL, the `srb-lock=` option can be used to force - or prevent Xen from protect the Special Register Buffer from leaking stale - data. By default, Xen will enable this mitigation, except on parts where MDS --is fixed and TAA is fixed/mitigated (in which case, there is believed to be no --way for an attacker to obtain the stale data). -+is fixed and TAA is fixed/mitigated and there are no unprivileged MMIO -+mappings (in which case, there is believed to be no way for an attacker to -+obtain stale data). -+ -+The `unpriv-mmio=` boolean indicates whether the system has (or will have) -+less than fully privileged domains granted access to MMIO devices. By -+default, this option is disabled. If enabled, Xen will use the `FB_CLEAR` -+and/or `SRBDS_CTRL` functionality available in the Intel May 2022 microcode -+release to mitigate cross-domain leakage of data via the MMIO Stale Data -+vulnerabilities. - - ### sync_console - > `= ` -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index d285538bde9f..099113ba41e6 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -67,6 +67,8 @@ static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */ - static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */ - - static int8_t __initdata opt_srb_lock = -1; -+static bool __initdata opt_unpriv_mmio; -+static bool __read_mostly opt_fb_clear_mmio; - - static int __init parse_spec_ctrl(const char *s) - { -@@ -184,6 +186,8 @@ static int __init parse_spec_ctrl(const char *s) - opt_branch_harden = val; - else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) - opt_srb_lock = val; -+ else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) -+ opt_unpriv_mmio = val; - else - rc = -EINVAL; - -@@ -392,7 +396,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", - opt_ibpb ? " IBPB" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", -- opt_md_clear_pv || opt_md_clear_hvm ? " VERW" : "", -+ opt_md_clear_pv || opt_md_clear_hvm || -+ opt_fb_clear_mmio ? " VERW" : "", - opt_branch_harden ? " BRANCH_HARDEN" : ""); - - /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ -@@ -941,7 +946,9 @@ void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); - -- d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm; -+ d->arch.verw = -+ (pv ? opt_md_clear_pv : opt_md_clear_hvm) || -+ (opt_fb_clear_mmio && is_iommu_enabled(d)); - } - - void __init init_speculation_mitigations(void) -@@ -1196,6 +1203,18 @@ void __init init_speculation_mitigations(void) - mds_calculations(caps); - - /* -+ * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have -+ * reintroduced the VERW fill buffer flushing side effect because of a -+ * susceptibility to FBSDP. -+ * -+ * If unprivileged guests have (or will have) MMIO mappings, we can -+ * mitigate cross-domain leakage of fill buffer data by issuing VERW on -+ * the return-to-guest path. -+ */ -+ if ( opt_unpriv_mmio ) -+ opt_fb_clear_mmio = caps & ARCH_CAPS_FB_CLEAR; -+ -+ /* - * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. - * This will only be a token effort for MLPDS/MFBDS when HT is enabled, - * but it is somewhat better than nothing. -@@ -1208,18 +1227,20 @@ void __init init_speculation_mitigations(void) - boot_cpu_has(X86_FEATURE_MD_CLEAR)); - - /* -- * Enable MDS defences as applicable. The Idle blocks need using if -- * either PV or HVM defences are used. -+ * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -+ * either the PV or HVM MDS defences are used, or if we may give MMIO -+ * access to untrusted guests. - * - * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with - * equivalent semantics to avoid needing to perform both flushes on the -- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH. -+ * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for -+ * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) - * - * After calculating the appropriate idle setting, simplify - * opt_md_clear_hvm to mean just "should we VERW on the way into HVM - * guests", so spec_ctrl_init_domain() can calculate suitable settings. - */ -- if ( opt_md_clear_pv || opt_md_clear_hvm ) -+ if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); - opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush; - -@@ -1284,14 +1305,19 @@ void __init init_speculation_mitigations(void) - * On some SRBDS-affected hardware, it may be safe to relax srb-lock by - * default. - * -- * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only known -- * way to access the Fill Buffer. If TSX isn't available (inc. SKU -- * reasons on some models), or TSX is explicitly disabled, then there is -- * no need for the extra overhead to protect RDRAND/RDSEED. -+ * All parts with SRBDS_CTRL suffer SSDP, the mechanism by which stale RNG -+ * data becomes available to other contexts. To recover the data, an -+ * attacker needs to use: -+ * - SBDS (MDS or TAA to sample the cores fill buffer) -+ * - SBDR (Architecturally retrieve stale transaction buffer contents) -+ * - DRPW (Architecturally latch stale fill buffer data) -+ * -+ * On MDS_NO parts, and with TAA_NO or TSX unavailable/disabled, and there -+ * is no unprivileged MMIO access, the RNG data doesn't need protecting. - */ - if ( cpu_has_srbds_ctrl ) - { -- if ( opt_srb_lock == -1 && -+ if ( opt_srb_lock == -1 && !opt_unpriv_mmio && - (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO && - (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) ) - opt_srb_lock = 0; diff --git a/xsa407-4.16-1.patch b/xsa407-4.16-1.patch deleted file mode 100644 index d6dc7fc..0000000 --- a/xsa407-4.16-1.patch +++ /dev/null @@ -1,153 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Rework spec_ctrl_flags context switching - -We are shortly going to need to context switch new bits in both the vcpu and -S3 paths. Introduce SCF_IST_MASK and SCF_DOM_MASK, and rework d->arch.verw -into d->arch.spec_ctrl_flags to accommodate. - -No functional change. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c -index 5eaa77f66a28..dd397f713067 100644 ---- a/xen/arch/x86/acpi/power.c -+++ b/xen/arch/x86/acpi/power.c -@@ -248,8 +248,8 @@ static int enter_state(u32 state) - error = 0; - - ci = get_cpu_info(); -- /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */ -- ci->spec_ctrl_flags &= ~SCF_ist_wrmsr; -+ /* Avoid NMI/#MC using unsafe MSRs until we've reloaded microcode. */ -+ ci->spec_ctrl_flags &= ~SCF_IST_MASK; - - ACPI_FLUSH_CPU_CACHE(); - -@@ -292,8 +292,8 @@ static int enter_state(u32 state) - if ( !recheck_cpu_features(0) ) - panic("Missing previously available feature(s)\n"); - -- /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */ -- ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr); -+ /* Re-enabled default NMI/#MC use of MSRs now microcode is loaded. */ -+ ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_IST_MASK); - - if ( boot_cpu_has(X86_FEATURE_IBRSB) || boot_cpu_has(X86_FEATURE_IBRS) ) - { -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 1fe6644a71ae..82a0b73cf6ef 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -2092,10 +2092,10 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - } - } - -- /* Update the top-of-stack block with the VERW disposition. */ -- info->spec_ctrl_flags &= ~SCF_verw; -- if ( nextd->arch.verw ) -- info->spec_ctrl_flags |= SCF_verw; -+ /* Update the top-of-stack block with the new spec_ctrl settings. */ -+ info->spec_ctrl_flags = -+ (info->spec_ctrl_flags & ~SCF_DOM_MASK) | -+ (nextd->arch.spec_ctrl_flags & SCF_DOM_MASK); - } - - sched_context_switched(prev, next); -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 9507e5da60a9..7e646680f1c7 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1010,9 +1010,12 @@ void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); - -- d->arch.verw = -- (pv ? opt_md_clear_pv : opt_md_clear_hvm) || -- (opt_fb_clear_mmio && is_iommu_enabled(d)); -+ bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || -+ (opt_fb_clear_mmio && is_iommu_enabled(d))); -+ -+ d->arch.spec_ctrl_flags = -+ (verw ? SCF_verw : 0) | -+ 0; - } - - void __init init_speculation_mitigations(void) -diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h -index 2398a1d99da9..e4c099262cb7 100644 ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -319,8 +319,7 @@ struct arch_domain - uint32_t pci_cf8; - uint8_t cmos_idx; - -- /* Use VERW on return-to-guest for its flushing side effect. */ -- bool verw; -+ uint8_t spec_ctrl_flags; /* See SCF_DOM_MASK */ - - union { - struct pv_domain pv; -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 7e83e0179fb9..3cd72e40305f 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -20,12 +20,40 @@ - #ifndef __X86_SPEC_CTRL_H__ - #define __X86_SPEC_CTRL_H__ - --/* Encoding of cpuinfo.spec_ctrl_flags */ -+/* -+ * Encoding of: -+ * cpuinfo.spec_ctrl_flags -+ * default_spec_ctrl_flags -+ * domain.spec_ctrl_flags -+ * -+ * Live settings are in the top-of-stack block, because they need to be -+ * accessable when XPTI is active. Some settings are fixed from boot, some -+ * context switched per domain, and some inhibited in the S3 path. -+ */ - #define SCF_use_shadow (1 << 0) - #define SCF_ist_wrmsr (1 << 1) - #define SCF_ist_rsb (1 << 2) - #define SCF_verw (1 << 3) - -+/* -+ * The IST paths (NMI/#MC) can interrupt any arbitrary context. Some -+ * functionality requires updated microcode to work. -+ * -+ * On boot, this is easy; we load microcode before figuring out which -+ * speculative protections to apply. However, on the S3 resume path, we must -+ * be able to disable the configured mitigations until microcode is reloaded. -+ * -+ * These are the controls to inhibit on the S3 resume path until microcode has -+ * been reloaded. -+ */ -+#define SCF_IST_MASK (SCF_ist_wrmsr) -+ -+/* -+ * Some speculative protections are per-domain. These settings are merged -+ * into the top-of-stack block in the context switch path. -+ */ -+#define SCF_DOM_MASK (SCF_verw) -+ - #ifndef __ASSEMBLY__ - - #include -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 5a590bac44aa..66b00d511fc6 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -248,9 +248,6 @@ - - /* - * Use in IST interrupt/exception context. May interrupt Xen or PV context. -- * Fine grain control of SCF_ist_wrmsr is needed for safety in the S3 resume -- * path to avoid using MSR_SPEC_CTRL before the microcode introducing it has -- * been reloaded. - */ - .macro SPEC_CTRL_ENTRY_FROM_INTR_IST - /* diff --git a/xsa407-4.16-2.patch b/xsa407-4.16-2.patch deleted file mode 100644 index 012d028..0000000 --- a/xsa407-4.16-2.patch +++ /dev/null @@ -1,99 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Rename SCF_ist_wrmsr to SCF_ist_sc_msr - -We are about to introduce SCF_ist_ibpb, at which point SCF_ist_wrmsr becomes -ambiguous. - -No functional change. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 7e646680f1c7..89f95c083e1b 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1115,7 +1115,7 @@ void __init init_speculation_mitigations(void) - { - if ( opt_msr_sc_pv ) - { -- default_spec_ctrl_flags |= SCF_ist_wrmsr; -+ default_spec_ctrl_flags |= SCF_ist_sc_msr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV); - } - -@@ -1126,7 +1126,7 @@ void __init init_speculation_mitigations(void) - * Xen's value is not restored atomically. An early NMI hitting - * the VMExit path needs to restore Xen's value for safety. - */ -- default_spec_ctrl_flags |= SCF_ist_wrmsr; -+ default_spec_ctrl_flags |= SCF_ist_sc_msr; - setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM); - } - } -@@ -1139,7 +1139,7 @@ void __init init_speculation_mitigations(void) - * on real hardware matches the availability of MSR_SPEC_CTRL in the - * first place. - * -- * No need for SCF_ist_wrmsr because Xen's value is restored -+ * No need for SCF_ist_sc_msr because Xen's value is restored - * atomically WRT NMIs in the VMExit path. - * - * TODO: Adjust cpu_has_svm_spec_ctrl to be usable earlier on boot. -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 3cd72e40305f..f8f0ac47e759 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -31,7 +31,7 @@ - * context switched per domain, and some inhibited in the S3 path. - */ - #define SCF_use_shadow (1 << 0) --#define SCF_ist_wrmsr (1 << 1) -+#define SCF_ist_sc_msr (1 << 1) - #define SCF_ist_rsb (1 << 2) - #define SCF_verw (1 << 3) - -@@ -46,7 +46,7 @@ - * These are the controls to inhibit on the S3 resume path until microcode has - * been reloaded. - */ --#define SCF_IST_MASK (SCF_ist_wrmsr) -+#define SCF_IST_MASK (SCF_ist_sc_msr) - - /* - * Some speculative protections are per-domain. These settings are merged -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 66b00d511fc6..0ff1b118f882 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -266,8 +266,8 @@ - - .L\@_skip_rsb: - -- test $SCF_ist_wrmsr, %al -- jz .L\@_skip_wrmsr -+ test $SCF_ist_sc_msr, %al -+ jz .L\@_skip_msr_spec_ctrl - - xor %edx, %edx - testb $3, UREGS_cs(%rsp) -@@ -290,7 +290,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - * to speculate around the WRMSR. As a result, we need a dispatch - * serialising instruction in the else clause. - */ --.L\@_skip_wrmsr: -+.L\@_skip_msr_spec_ctrl: - lfence - UNLIKELY_END(\@_serialise) - .endm -@@ -301,7 +301,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - * Requires %rbx=stack_end - * Clobbers %rax, %rcx, %rdx - */ -- testb $SCF_ist_wrmsr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) -+ testb $SCF_ist_sc_msr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx) - jz .L\@_skip - - DO_SPEC_CTRL_EXIT_TO_XEN diff --git a/xsa407-4.16-3.patch b/xsa407-4.16-3.patch deleted file mode 100644 index c6f7e6b..0000000 --- a/xsa407-4.16-3.patch +++ /dev/null @@ -1,86 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Rename opt_ibpb to opt_ibpb_ctxt_switch - -We are about to introduce the use of IBPB at different points in Xen, making -opt_ibpb ambiguous. Rename it to opt_ibpb_ctxt_switch. - -No functional change. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c -index 82a0b73cf6ef..0d39981550ca 100644 ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -2064,7 +2064,7 @@ void context_switch(struct vcpu *prev, struct vcpu *next) - - ctxt_switch_levelling(next); - -- if ( opt_ibpb && !is_idle_domain(nextd) ) -+ if ( opt_ibpb_ctxt_switch && !is_idle_domain(nextd) ) - { - static DEFINE_PER_CPU(unsigned int, last); - unsigned int *last_id = &this_cpu(last); -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 89f95c083e1b..f4ae36eae2d0 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -54,7 +54,7 @@ int8_t __initdata opt_stibp = -1; - bool __read_mostly opt_ssbd; - int8_t __initdata opt_psfd = -1; - --bool __read_mostly opt_ibpb = true; -+bool __read_mostly opt_ibpb_ctxt_switch = true; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = true; -@@ -117,7 +117,7 @@ static int __init parse_spec_ctrl(const char *s) - - opt_thunk = THUNK_JMP; - opt_ibrs = 0; -- opt_ibpb = false; -+ opt_ibpb_ctxt_switch = false; - opt_ssbd = false; - opt_l1d_flush = 0; - opt_branch_harden = false; -@@ -238,7 +238,7 @@ static int __init parse_spec_ctrl(const char *s) - - /* Misc settings. */ - else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) -- opt_ibpb = val; -+ opt_ibpb_ctxt_switch = val; - else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 ) - opt_eager_fpu = val; - else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) -@@ -458,7 +458,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (opt_tsx & 1) ? " TSX+" : " TSX-", - !cpu_has_srbds_ctrl ? "" : - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", -- opt_ibpb ? " IBPB" : "", -+ opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", - opt_md_clear_pv || opt_md_clear_hvm || - opt_fb_clear_mmio ? " VERW" : "", -@@ -1240,7 +1240,7 @@ void __init init_speculation_mitigations(void) - - /* Check we have hardware IBPB support before using it... */ - if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) -- opt_ibpb = false; -+ opt_ibpb_ctxt_switch = false; - - /* Check whether Eager FPU should be enabled by default. */ - if ( opt_eager_fpu == -1 ) -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index f8f0ac47e759..fb4365575620 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -63,7 +63,7 @@ - void init_speculation_mitigations(void); - void spec_ctrl_init_domain(struct domain *d); - --extern bool opt_ibpb; -+extern bool opt_ibpb_ctxt_switch; - extern bool opt_ssbd; - extern int8_t opt_eager_fpu; - extern int8_t opt_l1d_flush; diff --git a/xsa407-4.16-4.patch b/xsa407-4.16-4.patch deleted file mode 100644 index 50e5f0f..0000000 --- a/xsa407-4.16-4.patch +++ /dev/null @@ -1,96 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Rework SPEC_CTRL_ENTRY_FROM_INTR_IST - -We are shortly going to add a conditional IBPB in this path. - -Therefore, we cannot hold spec_ctrl_flags in %eax, and rely on only clobbering -it after we're done with its contents. %rbx is available for use, and the -more normal register to hold preserved information in. - -With %rax freed up, use it instead of %rdx for the RSB tmp register, and for -the adjustment to spec_ctrl_flags. - -This leaves no use of %rdx, except as 0 for the upper half of WRMSR. In -practice, %rdx is 0 from SAVE_ALL on all paths and isn't likely to change in -the foreseeable future, so update the macro entry requirements to state this -dependency. This marginal optimisation can be revisited if circumstances -change. - -No practical change. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 2a86938f1f32..a1810bf4d311 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -932,7 +932,7 @@ ENTRY(double_fault) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx -@@ -968,7 +968,7 @@ handle_ist_exception: - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR_IST /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: abcd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 0ff1b118f882..15e24cde00d1 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -251,34 +251,33 @@ - */ - .macro SPEC_CTRL_ENTRY_FROM_INTR_IST - /* -- * Requires %rsp=regs, %r14=stack_end -- * Clobbers %rax, %rcx, %rdx -+ * Requires %rsp=regs, %r14=stack_end, %rdx=0 -+ * Clobbers %rax, %rbx, %rcx, %rdx - * - * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY - * maybexen=1, but with conditionals rather than alternatives. - */ -- movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %eax -+ movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx - -- test $SCF_ist_rsb, %al -+ test $SCF_ist_rsb, %bl - jz .L\@_skip_rsb - -- DO_OVERWRITE_RSB tmp=rdx /* Clobbers %rcx/%rdx */ -+ DO_OVERWRITE_RSB /* Clobbers %rax/%rcx */ - - .L\@_skip_rsb: - -- test $SCF_ist_sc_msr, %al -+ test $SCF_ist_sc_msr, %bl - jz .L\@_skip_msr_spec_ctrl - -- xor %edx, %edx -+ xor %eax, %eax - testb $3, UREGS_cs(%rsp) -- setnz %dl -- not %edx -- and %dl, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ setnz %al -+ not %eax -+ and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) - - /* Load Xen's intended value. */ - mov $MSR_SPEC_CTRL, %ecx - movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax -- xor %edx, %edx - wrmsr - - /* Opencoded UNLIKELY_START() with no condition. */ diff --git a/xsa407-4.16-5.patch b/xsa407-4.16-5.patch deleted file mode 100644 index ef6da7c..0000000 --- a/xsa407-4.16-5.patch +++ /dev/null @@ -1,285 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Support IBPB-on-entry - -We are going to need this to mitigate Branch Type Confusion on AMD/Hygon CPUs, -but as we've talked about using it in other cases too, arrange to support it -generally. However, this is also very expensive in some cases, so we're going -to want per-domain controls. - -Introduce SCF_ist_ibpb and SCF_entry_ibpb controls, adding them to the IST and -DOM masks as appropriate. Also introduce X86_FEATURE_IBPB_ENTRY_{PV,HVM} to -to patch the code blocks. - -For SVM, the STGI is serialising enough to protect against Spectre-v1 attacks, -so no "else lfence" is necessary. VT-x will use use the MSR host load list, -so doesn't need any code in the VMExit path. - -For the IST path, we can't safely check CPL==0 to skip a flush, as we might -have hit an entry path before it's IBPB. As IST hitting Xen is rare, flush -irrespective of CPL. A later path, SCF_ist_sc_msr, provides Spectre-v1 -safety. - -For the PV paths, we know we're interrupting CPL>0, while for the INTR paths, -we can safely check CPL==0. Only flush when interrupting guest context. - -An "else lfence" is needed for safety, but we want to be able to skip it on -unaffected CPUs, so the block wants to be an alternative, which means the -lfence has to be inline rather than UNLIKELY() (the replacement block doesn't -have displacements fixed up for anything other than the first instruction). - -As with SPEC_CTRL_ENTRY_FROM_INTR_IST, %rdx is 0 on entry so rely on this to -shrink the logic marginally. Update the comments to specify this new -dependency. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S -index 4ae55a2ef605..0ff4008060fa 100644 ---- a/xen/arch/x86/hvm/svm/entry.S -+++ b/xen/arch/x86/hvm/svm/entry.S -@@ -97,7 +97,19 @@ __UNLIKELY_END(nsvm_hap) - - GET_CURRENT(bx) - -- /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo Clob: acd */ -+ /* SPEC_CTRL_ENTRY_FROM_SVM Req: %rsp=regs/cpuinfo, %rdx=0 Clob: acd */ -+ -+ .macro svm_vmexit_cond_ibpb -+ testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) -+ jz .L_skip_ibpb -+ -+ mov $MSR_PRED_CMD, %ecx -+ mov $PRED_CMD_IBPB, %eax -+ wrmsr -+.L_skip_ibpb: -+ .endm -+ ALTERNATIVE "", svm_vmexit_cond_ibpb, X86_FEATURE_IBPB_ENTRY_HVM -+ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM - - .macro svm_vmexit_spec_ctrl -@@ -114,6 +126,10 @@ __UNLIKELY_END(nsvm_hap) - ALTERNATIVE "", svm_vmexit_spec_ctrl, X86_FEATURE_SC_MSR_HVM - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - -+ /* -+ * STGI is executed unconditionally, and is sufficiently serialising -+ * to safely resolve any Spectre-v1 concerns in the above logic. -+ */ - stgi - GLOBAL(svm_stgi_label) - mov %rsp,%rdi -diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c -index f9f9bc18cdbc..dd817cee4e69 100644 ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -1345,6 +1345,10 @@ static int construct_vmcs(struct vcpu *v) - rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D, - VMX_MSR_GUEST_LOADONLY); - -+ if ( !rc && (d->arch.spec_ctrl_flags & SCF_entry_ibpb) ) -+ rc = vmx_add_msr(v, MSR_PRED_CMD, PRED_CMD_IBPB, -+ VMX_MSR_HOST); -+ - out: - vmx_vmcs_exit(v); - -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 5fd6dbbd4513..b86d38d1c50d 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -18,7 +18,7 @@ ENTRY(entry_int82) - movl $HYPERCALL_VECTOR, 4(%rsp) - SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - CR4_PV32_RESTORE -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index a1810bf4d311..fba8ae498f74 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -260,7 +260,7 @@ ENTRY(lstar_enter) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -298,7 +298,7 @@ ENTRY(cstar_enter) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -338,7 +338,7 @@ GLOBAL(sysenter_eflags_saved) - movl $TRAP_syscall, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -392,7 +392,7 @@ ENTRY(int80_direct_trap) - movl $0x80, 4(%rsp) - SAVE_ALL - -- SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - GET_STACK_END(bx) -@@ -674,7 +674,7 @@ ENTRY(common_interrupt) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -@@ -708,7 +708,7 @@ GLOBAL(handle_exception) - - GET_STACK_END(14) - -- SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, Clob: acd */ -+ SPEC_CTRL_ENTRY_FROM_INTR /* Req: %rsp=regs, %r14=end, %rdx=0, Clob: acd */ - /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */ - - mov STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rcx -diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h -index 493d338a085e..672c9ee22ba2 100644 ---- a/xen/include/asm-x86/cpufeatures.h -+++ b/xen/include/asm-x86/cpufeatures.h -@@ -39,6 +39,8 @@ XEN_CPUFEATURE(XEN_LBR, X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */ - XEN_CPUFEATURE(SC_VERW_IDLE, X86_SYNTH(25)) /* VERW used by Xen for idle */ - XEN_CPUFEATURE(XEN_SHSTK, X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */ - XEN_CPUFEATURE(XEN_IBT, X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */ -+XEN_CPUFEATURE(IBPB_ENTRY_PV, X86_SYNTH(28)) /* MSR_PRED_CMD used by Xen for PV */ -+XEN_CPUFEATURE(IBPB_ENTRY_HVM, X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for HVM */ - - /* Bug words follow the synthetic words. */ - #define X86_NR_BUG 1 -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index fb4365575620..3fc599a817c4 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -34,6 +34,8 @@ - #define SCF_ist_sc_msr (1 << 1) - #define SCF_ist_rsb (1 << 2) - #define SCF_verw (1 << 3) -+#define SCF_ist_ibpb (1 << 4) -+#define SCF_entry_ibpb (1 << 5) - - /* - * The IST paths (NMI/#MC) can interrupt any arbitrary context. Some -@@ -46,13 +48,13 @@ - * These are the controls to inhibit on the S3 resume path until microcode has - * been reloaded. - */ --#define SCF_IST_MASK (SCF_ist_sc_msr) -+#define SCF_IST_MASK (SCF_ist_sc_msr | SCF_ist_ibpb) - - /* - * Some speculative protections are per-domain. These settings are merged - * into the top-of-stack block in the context switch path. - */ --#define SCF_DOM_MASK (SCF_verw) -+#define SCF_DOM_MASK (SCF_verw | SCF_entry_ibpb) - - #ifndef __ASSEMBLY__ - -diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h -index 15e24cde00d1..9eb4ad9ab71d 100644 ---- a/xen/include/asm-x86/spec_ctrl_asm.h -+++ b/xen/include/asm-x86/spec_ctrl_asm.h -@@ -88,6 +88,35 @@ - * - SPEC_CTRL_EXIT_TO_{SVM,VMX} - */ - -+.macro DO_SPEC_CTRL_COND_IBPB maybexen:req -+/* -+ * Requires %rsp=regs (also cpuinfo if !maybexen) -+ * Requires %r14=stack_end (if maybexen), %rdx=0 -+ * Clobbers %rax, %rcx, %rdx -+ * -+ * Conditionally issue IBPB if SCF_entry_ibpb is active. In the maybexen -+ * case, we can safely look at UREGS_cs to skip taking the hit when -+ * interrupting Xen. -+ */ -+ .if \maybexen -+ testb $SCF_entry_ibpb, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14) -+ jz .L\@_skip -+ testb $3, UREGS_cs(%rsp) -+ .else -+ testb $SCF_entry_ibpb, CPUINFO_xen_spec_ctrl(%rsp) -+ .endif -+ jz .L\@_skip -+ -+ mov $MSR_PRED_CMD, %ecx -+ mov $PRED_CMD_IBPB, %eax -+ wrmsr -+ jmp .L\@_done -+ -+.L\@_skip: -+ lfence -+.L\@_done: -+.endm -+ - .macro DO_OVERWRITE_RSB tmp=rax - /* - * Requires nothing -@@ -225,12 +254,16 @@ - - /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */ - #define SPEC_CTRL_ENTRY_FROM_PV \ -+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=0), \ -+ X86_FEATURE_IBPB_ENTRY_PV; \ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ - X86_FEATURE_SC_MSR_PV - - /* Use in interrupt/exception context. May interrupt Xen or PV context. */ - #define SPEC_CTRL_ENTRY_FROM_INTR \ -+ ALTERNATIVE "", __stringify(DO_SPEC_CTRL_COND_IBPB maybexen=1), \ -+ X86_FEATURE_IBPB_ENTRY_PV; \ - ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV; \ - ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ - X86_FEATURE_SC_MSR_PV -@@ -254,11 +287,23 @@ - * Requires %rsp=regs, %r14=stack_end, %rdx=0 - * Clobbers %rax, %rbx, %rcx, %rdx - * -- * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY -- * maybexen=1, but with conditionals rather than alternatives. -+ * This is logical merge of: -+ * DO_SPEC_CTRL_COND_IBPB maybexen=0 -+ * DO_OVERWRITE_RSB -+ * DO_SPEC_CTRL_ENTRY maybexen=1 -+ * but with conditionals rather than alternatives. - */ - movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx - -+ test $SCF_ist_ibpb, %bl -+ jz .L\@_skip_ibpb -+ -+ mov $MSR_PRED_CMD, %ecx -+ mov $PRED_CMD_IBPB, %eax -+ wrmsr -+ -+.L\@_skip_ibpb: -+ - test $SCF_ist_rsb, %bl - jz .L\@_skip_rsb - diff --git a/xsa407-4.16-6.patch b/xsa407-4.16-6.patch deleted file mode 100644 index 92290dc..0000000 --- a/xsa407-4.16-6.patch +++ /dev/null @@ -1,93 +0,0 @@ -From: Andrew Cooper -Subject: x86/cpuid: Enumeration for BTC_NO - -BTC_NO indicates that hardware is not succeptable to Branch Type Confusion. - -Zen3 CPUs don't suffer BTC. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/tools/libs/light/libxl_cpuid.c b/tools/libs/light/libxl_cpuid.c -index d462f9e421ed..bf6fdee360a9 100644 ---- a/tools/libs/light/libxl_cpuid.c -+++ b/tools/libs/light/libxl_cpuid.c -@@ -288,6 +288,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) - {"virt-ssbd", 0x80000008, NA, CPUID_REG_EBX, 25, 1}, - {"ssb-no", 0x80000008, NA, CPUID_REG_EBX, 26, 1}, - {"psfd", 0x80000008, NA, CPUID_REG_EBX, 28, 1}, -+ {"btc-no", 0x80000008, NA, CPUID_REG_EBX, 29, 1}, - - {"nc", 0x80000008, NA, CPUID_REG_ECX, 0, 8}, - {"apicidsize", 0x80000008, NA, CPUID_REG_ECX, 12, 4}, -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index bc7dcf55757a..fe22f5f5b68b 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -158,7 +158,7 @@ static const char *const str_e8b[32] = - /* [22] */ [23] = "ppin", - [24] = "amd-ssbd", [25] = "virt-ssbd", - [26] = "ssb-no", -- [28] = "psfd", -+ [28] = "psfd", [29] = "btc-no", - }; - - static const char *const str_7d0[32] = -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index b3b9a0df5fed..b158e3acb5c7 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -847,6 +847,16 @@ static void init_amd(struct cpuinfo_x86 *c) - warning_add(text); - } - break; -+ -+ case 0x19: -+ /* -+ * Zen3 (Fam19h model < 0x10) parts are not susceptible to -+ * Branch Type Confusion, but predate the allocation of the -+ * BTC_NO bit. Fill it back in if we're not virtualised. -+ */ -+ if (!cpu_has_hypervisor && !cpu_has(c, X86_FEATURE_BTC_NO)) -+ __set_bit(X86_FEATURE_BTC_NO, c->x86_capability); -+ break; - } - - display_cacheinfo(c); -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index f4ae36eae2d0..0f101c057f3e 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -388,7 +388,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -403,7 +403,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", -- (e8b & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : ""); -+ (e8b & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : "", -+ (e8b & cpufeat_mask(X86_FEATURE_BTC_NO)) ? " BTC_NO" : ""); - - /* Hardware features which need driving to mitigate issues. */ - printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 743b857dcd5c..e7b8167800a2 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -266,6 +266,7 @@ XEN_CPUFEATURE(AMD_SSBD, 8*32+24) /*S MSR_SPEC_CTRL.SSBD available */ - XEN_CPUFEATURE(VIRT_SSBD, 8*32+25) /* MSR_VIRT_SPEC_CTRL.SSBD */ - XEN_CPUFEATURE(SSB_NO, 8*32+26) /*A Hardware not vulnerable to SSB */ - XEN_CPUFEATURE(PSFD, 8*32+28) /*S MSR_SPEC_CTRL.PSFD */ -+XEN_CPUFEATURE(BTC_NO, 8*32+29) /*A Hardware not vulnerable to Branch Type Confusion */ - - /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ - XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ diff --git a/xsa407-4.16-7.patch b/xsa407-4.16-7.patch deleted file mode 100644 index b71a0f8..0000000 --- a/xsa407-4.16-7.patch +++ /dev/null @@ -1,94 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Enable Zen2 chickenbit - -... as instructed in the Branch Type Confusion whitepaper. - -This is part of XSA-407. - -Signed-off-by: Andrew Cooper - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index b158e3acb5c7..37ac84ddd74d 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -731,6 +731,31 @@ void amd_init_ssbd(const struct cpuinfo_x86 *c) - printk_once(XENLOG_ERR "No SSBD controls available\n"); - } - -+/* -+ * On Zen2 we offer this chicken (bit) on the altar of Speculation. -+ * -+ * Refer to the AMD Branch Type Confusion whitepaper: -+ * https://XXX -+ * -+ * Setting this unnamed bit supposedly causes prediction information on -+ * non-branch instructions to be ignored. It is to be set unilaterally in -+ * newer microcode. -+ * -+ * This chickenbit is something unrelated on Zen1, and Zen1 vs Zen2 isn't a -+ * simple model number comparison, so use STIBP as a heuristic to separate the -+ * two uarches in Fam17h(AMD)/18h(Hygon). -+ */ -+void amd_init_spectral_chicken(void) -+{ -+ uint64_t val, chickenbit = 1 << 1; -+ -+ if (cpu_has_hypervisor || !boot_cpu_has(X86_FEATURE_AMD_STIBP)) -+ return; -+ -+ if (rdmsr_safe(MSR_AMD64_DE_CFG2, val) == 0 && !(val & chickenbit)) -+ wrmsr_safe(MSR_AMD64_DE_CFG2, val | chickenbit); -+} -+ - void __init detect_zen2_null_seg_behaviour(void) - { - uint64_t base; -@@ -796,6 +821,9 @@ static void init_amd(struct cpuinfo_x86 *c) - - amd_init_ssbd(c); - -+ if (c->x86 == 0x17) -+ amd_init_spectral_chicken(); -+ - /* Probe for NSCB on Zen2 CPUs when not virtualised */ - if (!cpu_has_hypervisor && !cpu_has_nscb && c == &boot_cpu_data && - c->x86 == 0x17) -diff --git a/xen/arch/x86/cpu/cpu.h b/xen/arch/x86/cpu/cpu.h -index b593bd85f04f..145bc5156a86 100644 ---- a/xen/arch/x86/cpu/cpu.h -+++ b/xen/arch/x86/cpu/cpu.h -@@ -22,4 +22,5 @@ void early_init_amd(struct cpuinfo_x86 *c); - void amd_log_freq(const struct cpuinfo_x86 *c); - void amd_init_lfence(struct cpuinfo_x86 *c); - void amd_init_ssbd(const struct cpuinfo_x86 *c); -+void amd_init_spectral_chicken(void); - void detect_zen2_null_seg_behaviour(void); -diff --git a/xen/arch/x86/cpu/hygon.c b/xen/arch/x86/cpu/hygon.c -index cdc94130dd2e..6f8d491297e8 100644 ---- a/xen/arch/x86/cpu/hygon.c -+++ b/xen/arch/x86/cpu/hygon.c -@@ -41,6 +41,12 @@ static void init_hygon(struct cpuinfo_x86 *c) - detect_zen2_null_seg_behaviour(); - - /* -+ * TODO: Check heuristic safety with Hygon first -+ if (c->x86 == 0x18) -+ amd_init_spectral_chicken(); -+ */ -+ -+ /* - * Hygon CPUs before Zen2 don't clear segment bases/limits when - * loading a NULL selector. - */ -diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h -index 72bc32ba04ff..d3735e499e0f 100644 ---- a/xen/include/asm-x86/msr-index.h -+++ b/xen/include/asm-x86/msr-index.h -@@ -361,6 +361,7 @@ - #define MSR_AMD64_DE_CFG 0xc0011029 - #define AMD64_DE_CFG_LFENCE_SERIALISE (_AC(1, ULL) << 1) - #define MSR_AMD64_EX_CFG 0xc001102c -+#define MSR_AMD64_DE_CFG2 0xc00110e3 - - #define MSR_AMD64_DR0_ADDRESS_MASK 0xc0011027 - #define MSR_AMD64_DR1_ADDRESS_MASK 0xc0011019 diff --git a/xsa407-4.16-8.patch b/xsa407-4.16-8.patch deleted file mode 100644 index 8d53169..0000000 --- a/xsa407-4.16-8.patch +++ /dev/null @@ -1,293 +0,0 @@ -From: Andrew Cooper -Subject: x86/spec-ctrl: Mitigate Branch Type Confusion when possible - -Branch Type Confusion affects AMD/Hygon CPUs on Zen2 and earlier. To -mitigate, we require SMT safety (STIBP on Zen2, no-SMT on Zen1), and to issue -an IBPB on each entry to Xen, to flush the BTB. - -Due to performance concerns, dom0 (which is trusted in most configurations) is -excluded from protections by default. - -Therefore: - * Use STIBP by default on Zen2 too, which now means we want it on by default - on all hardware supporting STIBP. - * Break the current IBPB logic out into a new function, extending it with - IBPB-at-entry logic. - * Change the existing IBPB-at-ctxt-switch boolean to be tristate, and disable - it by default when IBPB-at-entry is providing sufficient safety. - -If all PV guests on the system are trusted, then it is recommended to boot -with `spec-ctrl=ibpb-entry=no-pv`, as this will provide an additional marginal -perf improvement. - -This is part of XSA-407 / CVE-2022-23825. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 1bbdb55129cc..bd6826d0ae05 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2234,7 +2234,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ , xen=, {pv,hvm}=, --> {msr-sc,rsb,md-clear}=|{pv,hvm}=, -+> {msr-sc,rsb,md-clear,ibpb-entry}=|{pv,hvm}=, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio}= ]` -@@ -2259,9 +2259,10 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` options offer fine --grained control over the primitives by Xen. These impact Xen's ability to --protect itself, and/or Xen's ability to virtualise support for guests to use. -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options -+offer fine grained control over the primitives by Xen. These impact Xen's -+ability to protect itself, and/or Xen's ability to virtualise support for -+guests to use. - - * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests - respectively. -@@ -2280,6 +2281,11 @@ protect itself, and/or Xen's ability to virtualise support for guests to use. - compatibility with development versions of this fix, `mds=` is also accepted - on Xen 4.12 and earlier as an alias. Consult vendor documentation in - preference to here.* -+* `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction -+ Barrier) is used on entry to Xen. This is used by default on hardware -+ vulnerable to Branch Type Confusion, but for performance reasons, dom0 is -+ unprotected by default. If it necessary to protect dom0 too, boot with -+ `spec-ctrl=ibpb-entry`. - - If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to - select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 0f101c057f3e..1d9796c34d71 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -39,6 +39,10 @@ static bool __initdata opt_rsb_hvm = true; - static int8_t __read_mostly opt_md_clear_pv = -1; - static int8_t __read_mostly opt_md_clear_hvm = -1; - -+static int8_t __read_mostly opt_ibpb_entry_pv = -1; -+static int8_t __read_mostly opt_ibpb_entry_hvm = -1; -+static bool __read_mostly opt_ibpb_entry_dom0; -+ - /* Cmdline controls for Xen's speculative settings. */ - static enum ind_thunk { - THUNK_DEFAULT, /* Decide which thunk to use at boot time. */ -@@ -54,7 +58,7 @@ int8_t __initdata opt_stibp = -1; - bool __read_mostly opt_ssbd; - int8_t __initdata opt_psfd = -1; - --bool __read_mostly opt_ibpb_ctxt_switch = true; -+int8_t __read_mostly opt_ibpb_ctxt_switch = -1; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = true; -@@ -114,6 +118,9 @@ static int __init parse_spec_ctrl(const char *s) - opt_rsb_hvm = false; - opt_md_clear_pv = 0; - opt_md_clear_hvm = 0; -+ opt_ibpb_entry_pv = 0; -+ opt_ibpb_entry_hvm = 0; -+ opt_ibpb_entry_dom0 = false; - - opt_thunk = THUNK_JMP; - opt_ibrs = 0; -@@ -140,12 +147,14 @@ static int __init parse_spec_ctrl(const char *s) - opt_msr_sc_pv = val; - opt_rsb_pv = val; - opt_md_clear_pv = val; -+ opt_ibpb_entry_pv = val; - } - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) - { - opt_msr_sc_hvm = val; - opt_rsb_hvm = val; - opt_md_clear_hvm = val; -+ opt_ibpb_entry_hvm = val; - } - else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) - { -@@ -210,6 +219,28 @@ static int __init parse_spec_ctrl(const char *s) - break; - } - } -+ else if ( (val = parse_boolean("ibpb-entry", s, ss)) != -1 ) -+ { -+ switch ( val ) -+ { -+ case 0: -+ case 1: -+ opt_ibpb_entry_pv = opt_ibpb_entry_hvm = -+ opt_ibpb_entry_dom0 = val; -+ break; -+ -+ case -2: -+ s += strlen("ibpb-entry="); -+ if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -+ opt_ibpb_entry_pv = val; -+ else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -+ opt_ibpb_entry_hvm = val; -+ else -+ default: -+ rc = -EINVAL; -+ break; -+ } -+ } - - /* Xen's speculative sidechannel mitigation settings. */ - else if ( !strncmp(s, "bti-thunk=", 10) ) -@@ -477,27 +508,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) - * mitigation support for guests. - */ - #ifdef CONFIG_HVM -- printk(" Support for HVM VMs:%s%s%s%s%s\n", -+ printk(" Support for HVM VMs:%s%s%s%s%s%s\n", - (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || - boot_cpu_has(X86_FEATURE_MD_CLEAR) || -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || - opt_eager_fpu) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); -+ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "", -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); - - #endif - #ifdef CONFIG_PV -- printk(" Support for PV VMs:%s%s%s%s%s\n", -+ printk(" Support for PV VMs:%s%s%s%s%s%s\n", - (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || - boot_cpu_has(X86_FEATURE_SC_RSB_PV) || - boot_cpu_has(X86_FEATURE_MD_CLEAR) || -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || - opt_eager_fpu) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); -+ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : "", -+ boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); - - printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", - opt_xpti_hwdom ? "enabled" : "disabled", -@@ -759,6 +794,55 @@ static bool __init should_use_eager_fpu(void) - } - } - -+static void __init ibpb_calculations(void) -+{ -+ /* Check we have hardware IBPB support before using it... */ -+ if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) -+ { -+ opt_ibpb_entry_hvm = opt_ibpb_entry_pv = opt_ibpb_ctxt_switch = 0; -+ opt_ibpb_entry_dom0 = false; -+ return; -+ } -+ -+ /* -+ * IBPB-on-entry mitigations for Branch Type Confusion. -+ * -+ * IBPB && !BTC_NO selects all AMD/Hygon hardware, not known to be safe, -+ * that we can provide some form of mitigation on. -+ */ -+ if ( opt_ibpb_entry_pv == -1 ) -+ opt_ibpb_entry_pv = (IS_ENABLED(CONFIG_PV) && -+ boot_cpu_has(X86_FEATURE_IBPB) && -+ !boot_cpu_has(X86_FEATURE_BTC_NO)); -+ if ( opt_ibpb_entry_hvm == -1 ) -+ opt_ibpb_entry_hvm = (IS_ENABLED(CONFIG_HVM) && -+ boot_cpu_has(X86_FEATURE_IBPB) && -+ !boot_cpu_has(X86_FEATURE_BTC_NO)); -+ -+ if ( opt_ibpb_entry_pv ) -+ { -+ setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_PV); -+ -+ /* -+ * We only need to flush in IST context if we're protecting against PV -+ * guests. HVM IBPB-on-entry protections are both atomic with -+ * NMI/#MC, so can't interrupt Xen ahead of having already flushed the -+ * BTB. -+ */ -+ default_spec_ctrl_flags |= SCF_ist_ibpb; -+ } -+ if ( opt_ibpb_entry_hvm ) -+ setup_force_cpu_cap(X86_FEATURE_IBPB_ENTRY_HVM); -+ -+ /* -+ * If we're using IBPB-on-entry to protect against PV and HVM guests -+ * (ignoring dom0 if trusted), then there's no need to also issue IBPB on -+ * context switch too. -+ */ -+ if ( opt_ibpb_ctxt_switch == -1 ) -+ opt_ibpb_ctxt_switch = !(opt_ibpb_entry_hvm && opt_ibpb_entry_pv); -+} -+ - /* Calculate whether this CPU is vulnerable to L1TF. */ - static __init void l1tf_calculations(uint64_t caps) - { -@@ -1014,8 +1098,12 @@ void spec_ctrl_init_domain(struct domain *d) - bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || - (opt_fb_clear_mmio && is_iommu_enabled(d))); - -+ bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && -+ (d->domain_id != 0 || opt_ibpb_entry_dom0)); -+ - d->arch.spec_ctrl_flags = - (verw ? SCF_verw : 0) | -+ (ibpb ? SCF_entry_ibpb : 0) | - 0; - } - -@@ -1162,12 +1250,15 @@ void __init init_speculation_mitigations(void) - } - - /* -- * Use STIBP by default if the hardware hint is set. Otherwise, leave it -- * off as it a severe performance pentalty on pre-eIBRS Intel hardware -- * where it was retrofitted in microcode. -+ * Use STIBP by default on all AMD systems. Zen3 and later enumerate -+ * STIBP_ALWAYS, but STIBP is needed on Zen2 as part of the mitigations -+ * for Branch Type Confusion. -+ * -+ * Leave STIBP off by default on Intel. Pre-eIBRS systems suffer a -+ * substantial perf hit when it was implemented in microcode. - */ - if ( opt_stibp == -1 ) -- opt_stibp = !!boot_cpu_has(X86_FEATURE_STIBP_ALWAYS); -+ opt_stibp = !!boot_cpu_has(X86_FEATURE_AMD_STIBP); - - if ( opt_stibp && (boot_cpu_has(X86_FEATURE_STIBP) || - boot_cpu_has(X86_FEATURE_AMD_STIBP)) ) -@@ -1239,9 +1330,7 @@ void __init init_speculation_mitigations(void) - if ( opt_rsb_hvm ) - setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM); - -- /* Check we have hardware IBPB support before using it... */ -- if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) ) -- opt_ibpb_ctxt_switch = false; -+ ibpb_calculations(); - - /* Check whether Eager FPU should be enabled by default. */ - if ( opt_eager_fpu == -1 ) -diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h -index 3fc599a817c4..9403b81dc7af 100644 ---- a/xen/include/asm-x86/spec_ctrl.h -+++ b/xen/include/asm-x86/spec_ctrl.h -@@ -65,7 +65,7 @@ - void init_speculation_mitigations(void); - void spec_ctrl_init_domain(struct domain *d); - --extern bool opt_ibpb_ctxt_switch; -+extern int8_t opt_ibpb_ctxt_switch; - extern bool opt_ssbd; - extern int8_t opt_eager_fpu; - extern int8_t opt_l1d_flush; diff --git a/xsa408.patch b/xsa408.patch deleted file mode 100644 index c58193f..0000000 --- a/xsa408.patch +++ /dev/null @@ -1,36 +0,0 @@ -From: Jan Beulich -Subject: x86/mm: correct TLB flush condition in _get_page_type() - -When this logic was moved, it was moved across the point where nx is -updated to hold the new type for the page. IOW originally it was -equivalent to using x (and perhaps x would better have been used), but -now it isn't anymore. Switch to using x, which then brings things in -line again with the slightly earlier comment there (now) talking about -transitions _from_ writable. - -I have to confess though that I cannot make a direct connection between -the reported observed behavior of guests leaving several pages around -with pending general references and the change here. Repeated testing, -nevertheless, confirms the reported issue is no longer there. - -This is CVE-2022-33745 / XSA-408. - -Reported-by: Charles Arnold -Fixes: 8cc5036bc385 ("x86/pv: Fix ABAC cmpxchg() race in _get_page_type()") -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper ---- -I'd be happy to update the description to actually connect things, as -long as someone can give some plausible explanation. - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -3038,7 +3038,7 @@ static int _get_page_type(struct page_in - if ( unlikely(!cpumask_empty(mask)) && - /* Shadow mode: track only writable pages. */ - (!shadow_mode_enabled(d) || -- ((nx & PGT_type_mask) == PGT_writable_page)) ) -+ ((x & PGT_type_mask) == PGT_writable_page)) ) - { - perfc_incr(need_flush_tlb_flush); - /*