07412b4
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
07412b4
From: Stefan Berger <stefanb@linux.ibm.com>
07412b4
Date: Tue, 25 Jul 2023 13:23:10 -0400
07412b4
Subject: [PATCH] kern/ieee1275/init: ppc64: Restrict high memory in presence
07412b4
 of fadump
07412b4
07412b4
When a kernel dump is present then restrict the high memory regions to
07412b4
avoid allocating memory where the kernel dump resides. Use the
07412b4
ibm,kernel-dump node under /rtas to determine whether a kernel dump exists
07412b4
and up to which limit grub can use available memory. Set the
07412b4
upper_mem_limit to the size of the kernel dump section of type
07412b4
'REAL_MODE_REGION' and therefore only allow grub's memory usage for high
07412b4
addresses from RMO_ADDR_MAX to 'upper_mem_limit'. This means that grub can
07412b4
use high memory in the range of RMO_ADDR_MAX (768MB) to upper_mem_limit and
07412b4
the kernel-dump memory regions above 'upper_mem_limit' remain untouched.
07412b4
This change has no effect on memory allocations below 'linux_rmo_save'
07412b4
(typically at 640MB).
07412b4
07412b4
Also, fall back to allocating below rmo_linux_save in case the chunk of
07412b4
memory there would be larger than the chunk of memory above RMO_ADDR_MAX.
07412b4
This can for example occur if a free memory area is found starting at 300MB
07412b4
extending up to 1GB but a kernel dump is located at 768MB and therefore
07412b4
does not allow the allocation of the high memory area but requiring to use
07412b4
the chunk starting at 300MB to avoid an unnecessary out-of-memory
07412b4
condition.
07412b4
07412b4
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
07412b4
Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>
07412b4
Cc: Pavithra Prakash <pavrampu@in.ibm.com>
07412b4
Cc: Michael Ellerman <mpe@ellerman.id.au>
07412b4
Cc: Carolyn Scherrer <cpscherr@us.ibm.com>
07412b4
Cc: Mahesh Salgaonkar <mahesh@linux.ibm.com>
07412b4
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
07412b4
---
07412b4
 grub-core/kern/ieee1275/init.c | 144 ++++++++++++++++++++++++++++++++++++++++-
07412b4
 1 file changed, 142 insertions(+), 2 deletions(-)
07412b4
07412b4
diff --git a/grub-core/kern/ieee1275/init.c b/grub-core/kern/ieee1275/init.c
07412b4
index 3d4ad9d1f162..8e7f742fad46 100644
07412b4
--- a/grub-core/kern/ieee1275/init.c
07412b4
+++ b/grub-core/kern/ieee1275/init.c
07412b4
@@ -17,6 +17,8 @@
07412b4
  *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
07412b4
  */
07412b4
 
07412b4
+#include <stddef.h> /* offsetof() */
07412b4
+
07412b4
 #include <grub/kernel.h>
07412b4
 #include <grub/dl.h>
07412b4
 #include <grub/disk.h>
07412b4
@@ -198,6 +200,96 @@ grub_claim_heap (void)
07412b4
 #else
07412b4
 /* Helpers for mm on powerpc. */
07412b4
 
07412b4
+/* ibm,kernel-dump data structures */
07412b4
+struct kd_section
07412b4
+{
07412b4
+  grub_uint32_t flags;
07412b4
+  grub_uint16_t src_datatype;
07412b4
+#define KD_SRC_DATATYPE_REAL_MODE_REGION  0x0011
07412b4
+  grub_uint16_t error_flags;
07412b4
+  grub_uint64_t src_address;
07412b4
+  grub_uint64_t num_bytes;
07412b4
+  grub_uint64_t act_bytes;
07412b4
+  grub_uint64_t dst_address;
07412b4
+} GRUB_PACKED;
07412b4
+
07412b4
+#define MAX_KD_SECTIONS 10
07412b4
+
07412b4
+struct kernel_dump
07412b4
+{
07412b4
+  grub_uint32_t format;
07412b4
+  grub_uint16_t num_sections;
07412b4
+  grub_uint16_t status_flags;
07412b4
+  grub_uint32_t offset_1st_section;
07412b4
+  grub_uint32_t num_blocks;
07412b4
+  grub_uint64_t start_block;
07412b4
+  grub_uint64_t num_blocks_avail;
07412b4
+  grub_uint32_t offet_path_string;
07412b4
+  grub_uint32_t max_time_allowed;
07412b4
+  struct kd_section kds[MAX_KD_SECTIONS]; /* offset_1st_section should point to kds[0] */
07412b4
+} GRUB_PACKED;
07412b4
+
07412b4
+/*
07412b4
+ * Determine if a kernel dump exists and if it does, then determine the highest
07412b4
+ * address that grub can use for memory allocations.
07412b4
+ * The caller must have initialized *highest to rmo_top. *highest will not
07412b4
+ * be modified if no kernel dump is found.
07412b4
+ */
07412b4
+static void
07412b4
+check_kernel_dump (grub_uint64_t *highest)
07412b4
+{
07412b4
+  struct kernel_dump kernel_dump;
07412b4
+  grub_ssize_t kernel_dump_size;
07412b4
+  grub_ieee1275_phandle_t rtas;
07412b4
+  struct kd_section *kds;
07412b4
+  grub_size_t i;
07412b4
+
07412b4
+  /* If there's a kernel-dump it must have at least one section */
07412b4
+  if (grub_ieee1275_finddevice ("/rtas", &rtas) ||
07412b4
+      grub_ieee1275_get_property (rtas, "ibm,kernel-dump", &kernel_dump,
07412b4
+                                  sizeof (kernel_dump), &kernel_dump_size) ||
07412b4
+      kernel_dump_size <= (grub_ssize_t) offsetof (struct kernel_dump, kds[1]))
07412b4
+    return;
07412b4
+
07412b4
+  kernel_dump_size = grub_min (kernel_dump_size, (grub_ssize_t) sizeof (kernel_dump));
07412b4
+
07412b4
+  if (grub_be_to_cpu32 (kernel_dump.format) != 1)
07412b4
+    {
07412b4
+      grub_printf (_("Error: ibm,kernel-dump has an unexpected format version '%u'\n"),
07412b4
+                   grub_be_to_cpu32 (kernel_dump.format));
07412b4
+      return;
07412b4
+    }
07412b4
+
07412b4
+  if (grub_be_to_cpu16 (kernel_dump.num_sections) > MAX_KD_SECTIONS)
07412b4
+    {
07412b4
+      grub_printf (_("Error: Too many kernel dump sections: %d\n"),
07412b4
+                   grub_be_to_cpu32 (kernel_dump.num_sections));
07412b4
+      return;
07412b4
+    }
07412b4
+
07412b4
+  for (i = 0; i < grub_be_to_cpu16 (kernel_dump.num_sections); i++)
07412b4
+    {
07412b4
+      kds = (struct kd_section *) ((grub_addr_t) &kernel_dump +
07412b4
+                                   grub_be_to_cpu32 (kernel_dump.offset_1st_section) +
07412b4
+                                   i * sizeof (struct kd_section));
07412b4
+      /* sanity check the address is within the 'kernel_dump' struct */
07412b4
+      if ((grub_addr_t) kds > (grub_addr_t) &kernel_dump + kernel_dump_size + sizeof (*kds))
07412b4
+        {
07412b4
+          grub_printf (_("Error: 'kds' address beyond last available section\n"));
07412b4
+          return;
07412b4
+        }
07412b4
+
07412b4
+      if ((grub_be_to_cpu16 (kds->src_datatype) == KD_SRC_DATATYPE_REAL_MODE_REGION) &&
07412b4
+          (grub_be_to_cpu64 (kds->src_address) == 0))
07412b4
+        {
07412b4
+          *highest = grub_min (*highest, grub_be_to_cpu64 (kds->num_bytes));
07412b4
+          break;
07412b4
+        }
07412b4
+    }
07412b4
+
07412b4
+  return;
07412b4
+}
07412b4
+
07412b4
 /*
07412b4
  * How much memory does OF believe exists in total?
07412b4
  *
07412b4
@@ -277,10 +369,31 @@ regions_claim (grub_uint64_t addr, grub_uint64_t len, grub_memory_type_t type,
07412b4
    *
07412b4
    * Finally, we also want to make sure that when grub loads the kernel,
07412b4
    * it isn't going to use up all the memory we're trying to reserve! So
07412b4
-   * enforce our entire RUNTIME_MIN_SPACE here:
07412b4
+   * enforce our entire RUNTIME_MIN_SPACE here (no fadump):
07412b4
+   *
07412b4
+   * | Top of memory == upper_mem_limit -|
07412b4
+   * |                                   |
07412b4
+   * |             available             |
07412b4
+   * |                                   |
07412b4
+   * |----------     768 MB    ----------|
07412b4
+   * |                                   |
07412b4
+   * |              reserved             |
07412b4
+   * |                                   |
07412b4
+   * |--- 768 MB - runtime min space  ---|
07412b4
+   * |                                   |
07412b4
+   * |             available             |
07412b4
+   * |                                   |
07412b4
+   * |----------      0 MB     ----------|
07412b4
+   *
07412b4
+   * In case fadump is used, we allow the following:
07412b4
    *
07412b4
    * |---------- Top of memory ----------|
07412b4
    * |                                   |
07412b4
+   * |             unavailable           |
07412b4
+   * |         (kernel dump area)        |
07412b4
+   * |                                   |
07412b4
+   * |--------- upper_mem_limit ---------|
07412b4
+   * |                                   |
07412b4
    * |             available             |
07412b4
    * |                                   |
07412b4
    * |----------     768 MB    ----------|
07412b4
@@ -335,17 +448,44 @@ regions_claim (grub_uint64_t addr, grub_uint64_t len, grub_memory_type_t type,
07412b4
         }
07412b4
       else
07412b4
         {
07412b4
+          grub_uint64_t upper_mem_limit = rmo_top;
07412b4
+          grub_uint64_t orig_addr = addr;
07412b4
+
07412b4
+          check_kernel_dump (&upper_mem_limit);
07412b4
+
07412b4
           /*
07412b4
            * we order these cases to prefer higher addresses and avoid some
07412b4
            * splitting issues
07412b4
+           * The following shows the order of variables:
07412b4
+           *  no   kernel dump: linux_rmo_save < RMO_ADDR_MAX <= upper_mem_limit == rmo_top
07412b4
+           *  with kernel dump: liuxx_rmo_save < RMO_ADDR_MAX <= upper_mem_limit <= rmo_top
07412b4
            */
07412b4
-          if (addr < RMO_ADDR_MAX && (addr + len) > RMO_ADDR_MAX)
07412b4
+          if (addr < RMO_ADDR_MAX && (addr + len) > RMO_ADDR_MAX && upper_mem_limit >= RMO_ADDR_MAX)
07412b4
             {
07412b4
               grub_dprintf ("ieee1275",
07412b4
                             "adjusting region for RUNTIME_MIN_SPACE: (%llx -> %llx) -> (%llx -> %llx)\n",
07412b4
                             addr, addr + len, RMO_ADDR_MAX, addr + len);
07412b4
               len = (addr + len) - RMO_ADDR_MAX;
07412b4
               addr = RMO_ADDR_MAX;
07412b4
+
07412b4
+              /* We must not exceed the upper_mem_limit (assuming it's >= RMO_ADDR_MAX) */
07412b4
+              if (addr + len > upper_mem_limit)
07412b4
+                {
07412b4
+                  /* take the bigger chunk from either below linux_rmo_save or above upper_mem_limit */
07412b4
+                  len = upper_mem_limit - addr;
07412b4
+                  if (orig_addr < linux_rmo_save && linux_rmo_save - orig_addr > len)
07412b4
+                    {
07412b4
+                      /* lower part is bigger */
07412b4
+                      addr = orig_addr;
07412b4
+                      len = linux_rmo_save - addr;
07412b4
+                    }
07412b4
+
07412b4
+                  grub_dprintf ("ieee1275", "re-adjusted region to: (%llx -> %llx)\n",
07412b4
+                                addr, addr + len);
07412b4
+
07412b4
+                  if (len == 0)
07412b4
+                    return 0;
07412b4
+                }
07412b4
             }
07412b4
           else if ((addr < linux_rmo_save) && ((addr + len) > linux_rmo_save))
07412b4
             {