struct syscall_metadata *entry;
int i, syscall, val, len;
unsigned char *ptr;
+ int offset = 0;
trace = (typeof(trace))ent;
syscall = trace->nr;
continue;
/* This arg points to a user space string */
- ptr = (void *)trace->args + sizeof(long) * entry->nb_args;
+ ptr = (void *)trace->args + sizeof(long) * entry->nb_args + offset;
val = *(int *)ptr;
/* The value is a dynamic string (len << 16 | offset) */
ptr = (void *)ent + (val & 0xffff);
len = val >> 16;
+ offset += 4;
if (entry->user_arg_size < 0 || entry->user_arg_is_str) {
trace_seq_printf(s, " \"%.*s\"", len, ptr);
unsigned long mask;
char *arg;
int offset = offsetof(typeof(trace), args);
- int idx;
int ret = 0;
int len;
int i;
return ret;
mask = meta->user_mask;
- idx = ffs(mask) - 1;
- /*
- * User space data is faulted into a temporary buffer and then
- * added as a dynamic string or array to the end of the event.
- * The user space data name for the arg pointer is "__<arg>_val".
- */
- len = strlen(meta->args[idx]) + sizeof("___val");
- arg = kmalloc(len, GFP_KERNEL);
- if (WARN_ON_ONCE(!arg)) {
- meta->user_mask = 0;
- return -ENOMEM;
- }
+ while (mask) {
+ int idx = ffs(mask) - 1;
+ mask &= ~BIT(idx);
+
+ /*
+ * User space data is faulted into a temporary buffer and then
+ * added as a dynamic string or array to the end of the event.
+ * The user space data name for the arg pointer is
+ * "__<arg>_val".
+ */
+ len = strlen(meta->args[idx]) + sizeof("___val");
+ arg = kmalloc(len, GFP_KERNEL);
+ if (WARN_ON_ONCE(!arg)) {
+ meta->user_mask = 0;
+ return -ENOMEM;
+ }
- snprintf(arg, len, "__%s_val", meta->args[idx]);
+ snprintf(arg, len, "__%s_val", meta->args[idx]);
- ret = trace_define_field(call, "__data_loc char[]",
- arg, offset, sizeof(int), 0,
- FILTER_OTHER);
- if (ret)
- kfree(arg);
+ ret = trace_define_field(call, "__data_loc char[]",
+ arg, offset, sizeof(int), 0,
+ FILTER_OTHER);
+ if (ret) {
+ kfree(arg);
+ break;
+ }
+ offset += 4;
+ }
return ret;
}
+/*
+ * Create a per CPU temporary buffer to copy user space pointers into.
+ *
+ * SYSCALL_FAULT_BUF_SZ holds the size of the per CPU buffer to use
+ * to copy memory from user space addresses into.
+ *
+ * SYSCALL_FAULT_ARG_SZ is the amount to copy from user space.
+ *
+ * SYSCALL_FAULT_USER_MAX is the amount to copy into the ring buffer.
+ * It's slightly smaller than SYSCALL_FAULT_ARG_SZ to know if it
+ * needs to append the EXTRA or not.
+ *
+ * This only allows up to 3 args from system calls.
+ */
#define SYSCALL_FAULT_BUF_SZ 512
+#define SYSCALL_FAULT_ARG_SZ 168
+#define SYSCALL_FAULT_USER_MAX 128
+#define SYSCALL_FAULT_MAX_CNT 3
/* Use the tracing per CPU buffer infrastructure to copy from user space */
struct syscall_user_buffer {
call_rcu_tasks_trace(&sbuf->rcu, rcu_free_syscall_buffer);
}
+struct syscall_args {
+ char *ptr_array[SYSCALL_FAULT_MAX_CNT];
+ int read[SYSCALL_FAULT_MAX_CNT];
+ int uargs;
+};
+
static int syscall_copy_user(char *buf, const char __user *ptr,
size_t size, void *data)
{
- unsigned long *ret_size = data;
+ struct syscall_args *args = data;
+ int ret;
+
+ for (int i = 0; i < args->uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+ ptr = (char __user *)args->ptr_array[i];
+ ret = strncpy_from_user(buf, ptr, size);
+ args->read[i] = ret;
+ }
+ return 0;
+}
+
+static int syscall_copy_user_array(char *buf, const char __user *ptr,
+ size_t size, void *data)
+{
+ struct syscall_args *args = data;
int ret;
- ret = strncpy_from_user(buf, ptr, size);
- if (ret < 0)
- return 1;
- *ret_size = ret;
+ for (int i = 0; i < args->uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+ ptr = (char __user *)args->ptr_array[i];
+ ret = __copy_from_user(buf, ptr, size);
+ args->read[i] = ret ? -1 : size;
+ }
return 0;
}
static char *sys_fault_user(struct syscall_metadata *sys_data,
struct syscall_user_buffer *sbuf,
- unsigned long *args, unsigned int *data_size)
+ unsigned long *args,
+ unsigned int data_size[SYSCALL_FAULT_MAX_CNT])
{
trace_user_buf_copy syscall_copy = syscall_copy_user;
- unsigned long size = SYSCALL_FAULT_BUF_SZ - 1;
unsigned long mask = sys_data->user_mask;
- int idx = ffs(mask) - 1;
+ unsigned long size = SYSCALL_FAULT_ARG_SZ - 1;
+ struct syscall_args sargs;
bool array = false;
- char *ptr;
+ char *buffer;
char *buf;
+ int ret;
+ int i = 0;
- /* Get the pointer to user space memory to read */
- ptr = (char *)args[idx];
- *data_size = 0;
+ /* The extra is appended to the user data in the buffer */
+ BUILD_BUG_ON(SYSCALL_FAULT_USER_MAX + sizeof(EXTRA) >=
+ SYSCALL_FAULT_ARG_SZ);
/*
* If this system call event has a size argument, use
if (sys_data->user_arg_size >= 0) {
array = true;
size = args[sys_data->user_arg_size];
- if (size > SYSCALL_FAULT_BUF_SZ - 1)
- size = SYSCALL_FAULT_BUF_SZ - 1;
- /* use normal copy_from_user() */
- syscall_copy = NULL;
+ if (size > SYSCALL_FAULT_ARG_SZ - 1)
+ size = SYSCALL_FAULT_ARG_SZ - 1;
+ syscall_copy = syscall_copy_user_array;
}
- buf = trace_user_fault_read(&sbuf->buf, ptr, size,
- syscall_copy, &size);
- if (!buf)
+ while (mask) {
+ int idx = ffs(mask) - 1;
+ mask &= ~BIT(idx);
+
+ if (WARN_ON_ONCE(i == SYSCALL_FAULT_MAX_CNT))
+ break;
+
+ /* Get the pointer to user space memory to read */
+ sargs.ptr_array[i++] = (char *)args[idx];
+ }
+
+ sargs.uargs = i;
+
+ /* Clear the values that are not used */
+ for (; i < SYSCALL_FAULT_MAX_CNT; i++) {
+ data_size[i] = -1; /* Denotes no pointer */
+ }
+
+ buffer = trace_user_fault_read(&sbuf->buf, NULL, size,
+ syscall_copy, &sargs);
+ if (!buffer)
return NULL;
- /* For strings, replace any non-printable characters with '.' */
- if (!array) {
- for (int i = 0; i < size; i++) {
- if (!isprint(buf[i]))
- buf[i] = '.';
- }
+ buf = buffer;
+ for (i = 0; i < sargs.uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
- /*
- * If the text was truncated due to our max limit, add "..." to
- * the string.
- */
- if (size > SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA)) {
- strscpy(buf + SYSCALL_FAULT_BUF_SZ - sizeof(EXTRA),
- EXTRA, sizeof(EXTRA));
- size = SYSCALL_FAULT_BUF_SZ;
+ ret = sargs.read[i];
+ if (ret < 0)
+ continue;
+ buf[ret] = '\0';
+
+ /* For strings, replace any non-printable characters with '.' */
+ if (!array) {
+ for (int x = 0; x < ret; x++) {
+ if (!isprint(buf[x]))
+ buf[x] = '.';
+ }
+
+ /*
+ * If the text was truncated due to our max limit,
+ * add "..." to the string.
+ */
+ if (ret > SYSCALL_FAULT_USER_MAX) {
+ strscpy(buf + SYSCALL_FAULT_USER_MAX, EXTRA,
+ sizeof(EXTRA));
+ ret = SYSCALL_FAULT_USER_MAX + sizeof(EXTRA);
+ } else {
+ buf[ret++] = '\0';
+ }
} else {
- buf[size++] = '\0';
+ ret = min(ret, SYSCALL_FAULT_USER_MAX);
}
+ data_size[i] = ret;
}
- *data_size = size;
- return buf;
+ return buffer;
}
static int
syscall_get_data(struct syscall_metadata *sys_data, unsigned long *args,
- char **buffer, int *size, int *user_size)
+ char **buffer, int *size, int *user_sizes, int *uargs)
{
struct syscall_user_buffer *sbuf;
+ int i;
/* If the syscall_buffer is NULL, tracing is being shutdown */
sbuf = READ_ONCE(syscall_buffer);
if (!sbuf)
return -1;
- *buffer = sys_fault_user(sys_data, sbuf, args, user_size);
+ *buffer = sys_fault_user(sys_data, sbuf, args, user_sizes);
/*
* user_size is the amount of data to append.
* Need to add 4 for the meta field that points to
* the user memory at the end of the event and also
* stores its size.
*/
- *size = 4 + *user_size;
+ for (i = 0; i < SYSCALL_FAULT_MAX_CNT; i++) {
+ if (user_sizes[i] < 0)
+ break;
+ *size += user_sizes[i] + 4;
+ }
+ /* Save the number of user read arguments of this syscall */
+ *uargs = i;
return 0;
}
static void syscall_put_data(struct syscall_metadata *sys_data,
struct syscall_trace_enter *entry,
- char *buffer, int size, int user_size)
+ char *buffer, int size, int *user_sizes, int uargs)
{
+ char *buf = buffer;
void *ptr;
int val;
/*
* The meta data will store the offset of the user data from
- * the beginning of the event.
+ * the beginning of the event. That is after the static arguments
+ * and the meta data fields.
*/
- val = (ptr - (void *)entry) + 4;
+ val = (ptr - (void *)entry) + 4 * uargs;
+
+ for (int i = 0; i < uargs; i++) {
- /* Store the offset and the size into the meta data */
- *(int *)ptr = val | (user_size << 16);
+ if (i)
+ val += user_sizes[i - 1];
- if (WARN_ON_ONCE((ptr - (void *)entry + user_size) > size))
- user_size = 0;
+ /* Store the offset and the size into the meta data */
+ *(int *)ptr = val | (user_sizes[i] << 16);
- /* Nothing to do if the user space was empty or faulted */
- if (user_size) {
- /* Now store the user space data into the event */
+ /* Skip the meta data */
ptr += 4;
- memcpy(ptr, buffer, user_size);
+ }
+
+ for (int i = 0; i < uargs; i++, buf += SYSCALL_FAULT_ARG_SZ) {
+ /* Nothing to do if the user space was empty or faulted */
+ if (!user_sizes[i])
+ continue;
+
+ memcpy(ptr, buf, user_sizes[i]);
+ ptr += user_sizes[i];
}
}
struct trace_event_buffer fbuffer;
unsigned long args[6];
char *user_ptr;
- int user_size = 0;
+ int user_sizes[SYSCALL_FAULT_MAX_CNT] = {};
int syscall_nr;
int size = 0;
+ int uargs = 0;
bool mayfault;
/*
if (mayfault) {
if (syscall_get_data(sys_data, args, &user_ptr,
- &size, &user_size) < 0)
+ &size, user_sizes, &uargs) < 0)
return;
}
memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
if (mayfault)
- syscall_put_data(sys_data, entry, user_ptr, size, user_size);
+ syscall_put_data(sys_data, entry, user_ptr, size, user_sizes, uargs);
trace_event_buffer_commit(&fbuffer);
}
static void check_faultable_syscall(struct trace_event_call *call, int nr)
{
struct syscall_metadata *sys_data = call->data;
+ unsigned long mask;
/* Only work on entry */
if (sys_data->enter_event != call)
case __NR_access:
#endif
case __NR_acct:
- case __NR_add_key: /* Just _type. TODO add _description */
case __NR_chdir:
#ifdef __NR_chown
case __NR_chown:
case __NR_delete_module:
case __NR_execve:
case __NR_fsopen:
- case __NR_getxattr: /* Just pathname, TODO add name */
#ifdef __NR_lchown
case __NR_lchown:
#endif
- case __NR_lgetxattr: /* Just pathname, TODO add name */
- case __NR_lremovexattr: /* Just pathname, TODO add name */
-#ifdef __NR_link
- case __NR_link: /* Just oldname. TODO add newname */
-#endif
- case __NR_listxattr: /* Just pathname, TODO add list */
- case __NR_llistxattr: /* Just pathname, TODO add list */
- case __NR_lsetxattr: /* Just pathname, TODO add list */
#ifdef __NR_open
case __NR_open:
#endif
case __NR_memfd_create:
- case __NR_mount: /* Just dev_name, TODO add dir_name and type */
#ifdef __NR_mkdir
case __NR_mkdir:
#endif
#endif
case __NR_mq_open:
case __NR_mq_unlink:
- case __NR_pivot_root: /* Just new_root, TODO add old_root */
#ifdef __NR_readlink
case __NR_readlink:
#endif
- case __NR_removexattr: /* Just pathname, TODO add name */
-#ifdef __NR_rename
- case __NR_rename: /* Just oldname. TODO add newname */
-#endif
- case __NR_request_key: /* Just _type. TODO add _description */
#ifdef __NR_rmdir
case __NR_rmdir:
#endif
- case __NR_setxattr: /* Just pathname, TODO add list */
case __NR_shmdt:
#ifdef __NR_statfs
case __NR_statfs:
#endif
case __NR_swapon:
case __NR_swapoff:
-#ifdef __NR_symlink
- case __NR_symlink: /* Just oldname. TODO add newname */
-#endif
#ifdef __NR_truncate
case __NR_truncate:
#endif
#ifdef __NR_futimesat
case __NR_futimesat:
#endif
- case __NR_getxattrat: /* Just pathname, TODO add name */
case __NR_inotify_add_watch:
- case __NR_linkat: /* Just oldname. TODO add newname */
- case __NR_listxattrat: /* Just pathname, TODO add list */
case __NR_mkdirat:
case __NR_mknodat:
case __NR_mount_setattr:
- case __NR_move_mount: /* Just from_pathname, TODO add to_pathname */
case __NR_name_to_handle_at:
#ifdef __NR_newfstatat
case __NR_newfstatat:
case __NR_open_tree:
case __NR_open_tree_attr:
case __NR_readlinkat:
-#ifdef __NR_renameat
- case __NR_renameat: /* Just oldname. TODO add newname */
-#endif
- case __NR_renameat2: /* Just oldname. TODO add newname */
- case __NR_removexattrat: /* Just pathname, TODO add name */
case __NR_quotactl:
- case __NR_setxattrat: /* Just pathname, TODO add list */
case __NR_syslog:
- case __NR_symlinkat: /* Just oldname. TODO add newname */
case __NR_statx:
case __NR_unlinkat:
case __NR_utimensat:
case __NR_fanotify_mark:
sys_data->user_mask = BIT(4);
break;
+ /* 2 user args, 0 and 1 */
+ case __NR_add_key:
+ case __NR_getxattr:
+ case __NR_lgetxattr:
+ case __NR_lremovexattr:
+#ifdef __NR_link
+ case __NR_link:
+#endif
+ case __NR_listxattr:
+ case __NR_llistxattr:
+ case __NR_lsetxattr:
+ case __NR_pivot_root:
+ case __NR_removexattr:
+#ifdef __NR_rename
+ case __NR_rename:
+#endif
+ case __NR_request_key:
+ case __NR_setxattr:
+#ifdef __NR_symlink
+ case __NR_symlink:
+#endif
+ sys_data->user_mask = BIT(0) | BIT(1);
+ break;
+ /* 2 user args, 0 and 2 */
+ case __NR_symlinkat:
+ sys_data->user_mask = BIT(0) | BIT(2);
+ break;
+ /* 2 user args, 1 and 3 */
+ case __NR_getxattrat:
+ case __NR_linkat:
+ case __NR_listxattrat:
+ case __NR_move_mount:
+#ifdef __NR_renameat
+ case __NR_renameat:
+#endif
+ case __NR_renameat2:
+ case __NR_removexattrat:
+ case __NR_setxattrat:
+ sys_data->user_mask = BIT(1) | BIT(3);
+ break;
+ case __NR_mount: /* Just dev_name and dir_name, TODO add type */
+ sys_data->user_mask = BIT(0) | BIT(1) | BIT(2);
+ break;
default:
sys_data->user_mask = 0;
+ return;
}
+
+ if (sys_data->user_arg_size < 0)
+ return;
+
+ /*
+ * The user_arg_size can only be used when the system call
+ * is reading only a single address from user space.
+ */
+ mask = sys_data->user_mask;
+ if (WARN_ON(mask & (mask - 1)))
+ sys_data->user_arg_size = -1;
}
static int __init init_syscall_trace(struct trace_event_call *call)
bool valid_prog_array;
bool mayfault;
char *user_ptr;
+ int user_sizes[SYSCALL_FAULT_MAX_CNT] = {};
int syscall_nr;
- int user_size;
int rctx;
int size = 0;
+ int uargs = 0;
/*
* Syscall probe called with preemption enabled, but the ring
if (mayfault) {
if (syscall_get_data(sys_data, args, &user_ptr,
- &size, &user_size) < 0)
+ &size, user_sizes, &uargs) < 0)
return;
}
memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
if (mayfault)
- syscall_put_data(sys_data, rec, user_ptr, size, user_size);
+ syscall_put_data(sys_data, rec, user_ptr, size, user_sizes, uargs);
if ((valid_prog_array &&
!perf_call_bpf_enter(sys_data->enter_event, fake_regs, sys_data, rec)) ||