Blob Blame History Raw
From 89126c7452c29736d38dc072a952b0b0c831fade Mon Sep 17 00:00:00 2001
From: Yonghong Song <yonghong.song@linux.dev>
Date: Mon, 29 Jan 2024 16:13:30 -0800
Subject: [PATCH] [PATCH] Fix ttysnoop.py with newer kernels

Jerome Marchand reported that ttysnoop.py won't work properly
with newer kernels (#4884). I did some investigation and found
that some kernel data structure change caused verification failure.
The failure is caused by the following:
  ; kvec  = from->kvec;
  // R1=ptr_iov_iter()
  15: (79) r1 = *(u64 *)(r1 +16)        ; R1_w=scalar()
  ; count = kvec->iov_len;
  16: (bf) r2 = r1                      ; R1_w=scalar(id=1) R2_w=scalar(id=1)
  17: (07) r2 += 8                      ; R2_w=scalar()
  18: (05) goto pc+3
  ;
  22: (79) r2 = *(u64 *)(r2 +0)
  R2 invalid mem access 'scalar'

So basically, loading 'iov_iter + 16' returns a scalar but verifier
expects it to be a pointer.

In v6.4, we have
    struct iovec
    {
        void __user *iov_base;  /* BSD uses caddr_t (1003.1g requires void *) */
        __kernel_size_t iov_len; /* Must be size_t (1003.1g) */
    };
    struct iov_iter {
        u8 iter_type;
        bool copy_mc;
        bool nofault;
        bool data_source;
        bool user_backed;
        union {
                size_t iov_offset;
                int last_offset;
        };
        union {
                struct iovec __ubuf_iovec;
                struct {
                        union {
                                const struct iovec *__iov;
                                const struct kvec *kvec;
                                const struct bio_vec *bvec;
                                struct xarray *xarray;
                                struct pipe_inode_info *pipe;
                                void __user *ubuf;
                        };
                        size_t count;
                };
        };
        union {
                unsigned long nr_segs;
                struct {
                        unsigned int head;
                        unsigned int start_head;
                };
                loff_t xarray_start;
        };
    };

The kernel traversal chain will be
   "struct iov_iter" -> "struct iovec __ubuf_iovec" -> "void __user *iov_base".
Since the "iov_base" type is a ptr to void, the kernel considers the
loaded value as a scalar which caused verification failure.

But for old kernel like 5.19, we do not have this issue.
    struct iovec
    {
        void __user *iov_base;  /* BSD uses caddr_t (1003.1g requires void *) */
        __kernel_size_t iov_len; /* Must be size_t (1003.1g) */
    };
    struct iov_iter {
        u8 iter_type;
        bool nofault;
        bool data_source;
        bool user_backed;
        size_t iov_offset;
        size_t count;
        union {
                const struct iovec *iov;
                const struct kvec *kvec;
                const struct bio_vec *bvec;
                struct xarray *xarray;
                struct pipe_inode_info *pipe;
                void __user *ubuf;
        };
        union {
                unsigned long nr_segs;
                struct {
                        unsigned int head;
                        unsigned int start_head;
                };
                loff_t xarray_start;
        };
    };

The kernel traversal chain will be
    "struct iov_iter" -> "const struct iovec *iov"
Note that "const struct iovec *iov" is used since it is the *first* member
inside the union. The traversal stops once we hit a pointer.
So the kernel verifier returns a 'struct iovec' object (untrusted, cannot
be used as a parameter to a call) and verifier can proceed.

To fix the problem, let us use bpf_probe_read_kernel() instead
so ttysnoop.py can continue to work with newer kernel.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 tools/ttysnoop.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ttysnoop.py b/tools/ttysnoop.py
index 77f97b7c..aca09db4 100755
--- a/tools/ttysnoop.py
+++ b/tools/ttysnoop.py
@@ -162,8 +162,8 @@ PROBE_TTY_WRITE
      */
     case CASE_ITER_IOVEC_NAME:
         kvec  = from->kvec;
-        buf   = kvec->iov_base;
-        count = kvec->iov_len;
+        bpf_probe_read_kernel(&buf, sizeof(buf), &kvec->iov_base);
+        bpf_probe_read_kernel(&count, sizeof(count), &kvec->iov_len);
         break;
     CASE_ITER_UBUF_TEXT
     /* TODO: Support more type */
-- 
2.43.0