integ/kernel/kernel-rt/centos/patches/Notification-of-death-of-ar...

537 lines
16 KiB
Diff

From 220edc1ccc6a0bc3dfb94a92946bf2b9a6cc0c61 Mon Sep 17 00:00:00 2001
From: Chris Friesen <chris.friesen@windriver.com>
Date: Thu, 7 Apr 2016 11:16:19 -0600
Subject: [PATCH] Notification of death of arbitrary processes
Note: this commit was copied from Titanium Cloud Rel2
This exposes a new feature which may be called to request
notification when an arbitrary process changes state. The
caller specifies a pid, signal number, and event mask, and
when that pid dies, or is stopped, or anything else that
would normally cause a SIGCHLD, the kernel will send the
specified signal to the caller if the event is in the event
mask originally passed down. The siginfo_t struct will
contain the same information as would be included with SIGCHLD.
This is exposed to userspace via the prctl() call with the
PR_DO_NOTIFY_TASK_STATE option.
Signed-off-by: Jim Somerville <Jim.Somerville@windriver.com>
---
include/linux/init_task.h | 9 ++
include/linux/sched.h | 6 ++
include/uapi/linux/prctl.h | 18 ++++
init/Kconfig | 15 +++
kernel/Makefile | 1 +
kernel/death_notify.c | 227 +++++++++++++++++++++++++++++++++++++++++++++
kernel/death_notify.h | 45 +++++++++
kernel/exit.c | 6 ++
kernel/fork.c | 4 +
kernel/signal.c | 11 +++
kernel/sys.c | 9 ++
11 files changed, 351 insertions(+)
create mode 100644 kernel/death_notify.c
create mode 100644 kernel/death_notify.h
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index d8c82e0..ba0c12e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -77,6 +77,14 @@ extern struct nsproxy init_nsproxy;
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh), \
}
+#ifdef CONFIG_SIGEXIT
+#define INIT_SIGEXIT(tsk) \
+ .notify = LIST_HEAD_INIT(tsk.notify), \
+ .monitor = LIST_HEAD_INIT(tsk.monitor),
+#else
+#define INIT_SIGEXIT(tsk)
+#endif
+
extern struct group_info init_groups;
#define INIT_STRUCT_PID { \
@@ -231,6 +239,7 @@ extern struct task_group root_task_group;
.alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \
.journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
+ INIT_SIGEXIT(tsk) \
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \
.timer_slack_ns = 50000, /* 50 usec default slack */ \
INIT_TIMER_LIST \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 97ff026..0785453 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1686,6 +1686,12 @@ struct task_struct {
short il_next;
short pref_node_fork;
#endif
+#ifdef CONFIG_SIGEXIT
+ /* list of processes to notify on death */
+ struct list_head notify;
+ /* list of outstanding monitor requests */
+ struct list_head monitor;
+#endif
#ifdef CONFIG_NUMA_BALANCING
int numa_scan_seq;
unsigned int numa_scan_period;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a817b5c..2cf7776 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -55,6 +55,24 @@
#define PR_SET_NAME 15 /* Set process name */
#define PR_GET_NAME 16 /* Get process name */
+#ifdef CONFIG_SIGEXIT
+#define PR_DO_NOTIFY_TASK_STATE 17 /* Set/get notification for task
+ state changes */
+
+/* This is the data structure for requestion process death
+ * (and other state change) information. Sig of -1 means
+ * query, sig of 0 means deregistration, positive sig means
+ * that you want to set it. sig and events are value-result
+ * and will be updated with the previous values on every
+ * successful call.
+ */
+struct task_state_notify_info {
+ pid_t pid;
+ int sig;
+ unsigned int events;
+};
+#endif
+
/* Get/set process endian */
#define PR_GET_ENDIAN 19
#define PR_SET_ENDIAN 20
diff --git a/init/Kconfig b/init/Kconfig
index 1d645a1..37e48c0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1581,6 +1581,21 @@ config VM_EVENT_COUNTERS
on EXPERT systems. /proc/vmstat will only show page counts
if VM event counters are disabled.
+config SIGEXIT
+ bool "Notification of death of arbitrary processes"
+ default n
+ help
+ When enabled this exposes a new feature which may be called to request
+ notification when an arbitrary process changes state. The caller specifies
+ a pid, signal number, and event mask, and when that pid dies, or is
+ stopped, or anything else that would normally cause a SIGCHLD, the
+ kernel will send the specified signal to the caller if the event is in
+ the event mask originally passed down. The siginfo_t struct will
+ contain the same information as would be included with SIGCHLD.
+
+ This is exposed to userspace via the prctl()
+ call with the PR_DO_NOTIFY_TASK_STATE option
+
config SLUB_DEBUG
default y
bool "Enable SLUB debugging support" if EXPERT
diff --git a/kernel/Makefile b/kernel/Makefile
index 762218c..d357e7d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -120,6 +120,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SIGEXIT) += death_notify.o
obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/death_notify.c b/kernel/death_notify.c
new file mode 100644
index 0000000..5eb8bfc
--- /dev/null
+++ b/kernel/death_notify.c
@@ -0,0 +1,227 @@
+/*
+ * kernel/death_notify.c, Process death notification support
+ *
+ * Copyright (c) 2006-2014 Wind River Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/prctl.h>
+#include <linux/uaccess.h>
+
+#include "death_notify.h"
+
+static void unlink_status_notifier(struct signotifier *n)
+{
+ list_del(&n->monitor_list);
+ list_del(&n->notify_list);
+ kfree(n);
+}
+
+static void handle_already_monitoring(struct signotifier *node,
+ struct task_state_notify_info *args,
+ struct task_state_notify_info *oldargs)
+{
+ /* Store the old values */
+ oldargs->sig = node->sig;
+ oldargs->events = node->events;
+
+ /* We know that args->sig is 0 or a valid signal. */
+ if (args->sig > 0) {
+ /* Update the new values */
+ node->sig = args->sig;
+ node->events = args->events;
+ } else if (!args->sig) {
+ /* args->sig of 0 means to deregister */
+ unlink_status_notifier(node);
+ }
+}
+
+static void setup_new_node(struct task_struct *p,
+ struct signotifier *node,
+ struct task_state_notify_info *args)
+{
+ node->notify_tsk = current;
+ node->sig = args->sig;
+ node->events = args->events;
+
+ /* Add this node to the list of notification requests
+ * for the specified process.
+ */
+ list_add_tail(&node->notify_list, &p->notify);
+
+ /* Also add this node to the list of monitor requests
+ * for the current process.
+ */
+ list_add_tail(&node->monitor_list, &current->monitor);
+}
+
+
+/* Returns 0 if arguments are valid, 1 if they are not. */
+static int invalid_args(struct task_state_notify_info *args)
+{
+ int ret = 1;
+
+ if (args->pid <= 0)
+ goto out;
+
+ /* Sig of -1 implies query, sig of 0 implies deregistration.
+ * Otherwise sig must be positive and within range.
+ */
+ if ((args->sig < -1) || (args->sig > _NSIG))
+ goto out;
+
+ /* If positive sig, must have valid events. */
+ if (args->sig > 0) {
+ if (!args->events || (args->events >= (1 << (NSIGCHLD+1))))
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Notify those registered for process state updates via do_notify_task_state().
+ * If "del" is nonzero, the process is dying and we want to free
+ * the nodes in the list as we go.
+ *
+ * Note: we only notify processes for events in which they have registered
+ * interest.
+ *
+ * Must be called holding a lock on tasklist_lock.
+ */
+void do_notify_others(struct task_struct *tsk, struct siginfo *info)
+{
+ struct signotifier *node;
+ unsigned int events;
+
+ /* This method of generating the event bit must be
+ * matched in the userspace library.
+ */
+ events = 1 << (info->si_code & 0xFF);
+
+ list_for_each_entry(node, &tsk->notify, notify_list) {
+ if (events & node->events) {
+ info->si_signo = node->sig;
+ group_send_sig_info(node->sig, info, node->notify_tsk);
+ }
+ }
+}
+
+void release_notify_others(struct task_struct *p)
+{
+ struct signotifier *n, *t;
+
+ /* Need to clean up any outstanding requests where we
+ * wanted to be notified when others died.
+ */
+ list_for_each_entry_safe(n, t, &p->monitor, monitor_list) {
+ unlink_status_notifier(n);
+ }
+
+ /* Also need to clean up any outstanding requests where others
+ * wanted to be notified when we died.
+ */
+ list_for_each_entry_safe(n, t, &p->notify, notify_list) {
+ unlink_status_notifier(n);
+ }
+}
+
+/* If the config is defined, then processes can call this routine
+ * to request notification when the specified task's state changes.
+ * On the death (or other state change) of the specified process,
+ * we will send them the specified signal if the event is listed
+ * in their event bitfield.
+ *
+ * A sig of 0 means that we want to deregister.
+ *
+ * The sig/events fields are value/result. On success we update them
+ * to reflect what they were before the call.
+ *
+ * Returns error code on error, on success we return 0.
+ */
+int do_notify_task_state(unsigned long arg)
+{
+ int err;
+ struct task_struct *p;
+ struct signotifier *node, *tmp;
+ struct task_state_notify_info args, oldargs;
+
+ if (copy_from_user(&args, (struct task_state_notify_info __user *)arg,
+ sizeof(args)))
+ return -EFAULT;
+ oldargs.pid = args.pid;
+
+ /* Validate the arguments passed in. */
+ err = -EINVAL;
+ if (invalid_args(&args))
+ goto out;
+
+ /* We must hold a write lock on tasklist_lock to add the notification
+ * later on, and we need some lock on tasklist_lock for
+ * find_task_by_pid(), so may as well take the write lock now.
+ * Must use write_lock_irq().
+ */
+ write_lock_irq(&tasklist_lock);
+
+ err = -ESRCH;
+ p = find_task_by_vpid(args.pid);
+ if (!p)
+ goto unlock_out;
+
+ /* Now we know pid exists, unlikely to fail. */
+ err = 0;
+
+ /* Check if we're already monitoring the specified pid. If so, update
+ * the monitoring parameters and return the old ones.
+ */
+ list_for_each_entry(tmp, &p->notify, notify_list) {
+ if (tmp->notify_tsk == current) {
+ handle_already_monitoring(tmp, &args, &oldargs);
+ goto unlock_out;
+ }
+ }
+
+ /* If we get here, we're not currently monitoring the process. */
+ oldargs.sig = 0;
+ oldargs.events = 0;
+
+ /* If we wanted to set up a new monitor, do it now. If we didn't
+ * manage to allocate memory for the new node, then we return
+ * an appropriate error.
+ */
+ if (args.sig > 0) {
+ node = kmalloc(sizeof(*node), GFP_ATOMIC);
+ if (node)
+ setup_new_node(p, node, &args);
+ else
+ err = -ENOMEM;
+ }
+
+unlock_out:
+ write_unlock_irq(&tasklist_lock);
+
+ /* Copy the old values back to caller. */
+ if (copy_to_user((struct task_state_notify_info __user *)arg,
+ &oldargs, sizeof(oldargs)))
+ err = -EFAULT;
+
+out:
+ return err;
+}
diff --git a/kernel/death_notify.h b/kernel/death_notify.h
new file mode 100644
index 0000000..b2b8e8c
--- /dev/null
+++ b/kernel/death_notify.h
@@ -0,0 +1,45 @@
+/*
+ * kernel/death_notify.h, Process death notification support
+ *
+ * Copyright (c) 2006-2014 Wind River Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+#ifndef _KERNEL_DEATH_NOTIFY_H
+#define _KERNEL_DEATH_NOTIFY_H
+
+#ifdef CONFIG_SIGEXIT
+
+struct signotifier {
+ struct task_struct *notify_tsk;
+ struct list_head notify_list;
+ struct list_head monitor_list;
+ int sig;
+ unsigned int events;
+};
+
+extern int do_notify_task_state(unsigned long arg);
+extern void do_notify_others(struct task_struct *tsk,
+ struct siginfo *info);
+extern void release_notify_others(struct task_struct *p);
+
+#else /* !CONFIG_SIGEXIT */
+
+static inline void do_notify_others(struct task_struct *tsk,
+ struct siginfo *info) {}
+static inline void release_notify_others(struct task_struct *p) {}
+
+#endif /* CONFIG_SIGEXIT */
+#endif
diff --git a/kernel/exit.c b/kernel/exit.c
index 8a908ea..448a3c3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -59,6 +59,9 @@
#include <asm/unistd.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
+#ifdef CONFIG_SIGEXIT
+#include "death_notify.h"
+#endif
static void exit_mm(struct task_struct * tsk);
@@ -184,6 +187,9 @@ repeat:
proc_flush_task(p);
tasklist_write_lock_irq();
+#ifdef CONFIG_SIGEXIT
+ release_notify_others(p);
+#endif
ptrace_release_task(p);
__exit_signal(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 6bda4c0..f3cd3ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1518,6 +1518,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->sequential_io = 0;
p->sequential_io_avg = 0;
#endif
+#ifdef CONFIG_SIGEXIT
+ INIT_LIST_HEAD(&p->notify);
+ INIT_LIST_HEAD(&p->monitor);
+#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
diff --git a/kernel/signal.c b/kernel/signal.c
index 59e84a2..728daa9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -47,6 +47,9 @@
#include <asm/siginfo.h>
#include <asm/cacheflush.h>
#include "audit.h" /* audit_signal_info() */
+#ifdef CONFIG_SIGEXIT
+#include "death_notify.h"
+#endif
/*
* SLAB caches for signal bits.
@@ -1849,6 +1852,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
__wake_up_parent(tsk, tsk->parent);
spin_unlock_irqrestore(&psig->siglock, flags);
+#ifdef CONFIG_SIGEXIT
+ do_notify_others(tsk, &info);
+#endif
+
return autoreap;
}
@@ -1920,6 +1927,10 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
*/
__wake_up_parent(tsk, parent);
spin_unlock_irqrestore(&sighand->siglock, flags);
+
+#ifdef CONFIG_SIGEXIT
+ do_notify_others(tsk, &info);
+#endif
}
static inline int may_ptrace_stop(void)
diff --git a/kernel/sys.c b/kernel/sys.c
index e79ea7e..5d43b93 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -67,6 +67,10 @@
/* Hardening for Spectre-v1 */
#include <linux/nospec.h>
+#ifdef CONFIG_SIGEXIT
+#include "death_notify.h"
+#endif
+
#ifndef SET_UNALIGN_CTL
# define SET_UNALIGN_CTL(a,b) (-EINVAL)
#endif
@@ -2478,6 +2482,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
error = PR_MCE_KILL_DEFAULT;
break;
+#ifdef CONFIG_SIGEXIT
+ case PR_DO_NOTIFY_TASK_STATE:
+ error = do_notify_task_state(arg2);
+ break;
+#endif
case PR_SET_MM:
error = prctl_set_mm(arg2, arg3, arg4, arg5);
break;
--
2.7.4