summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoachim Nilsson <troglobit@gmail.com>2012-11-02 14:40:46 (GMT)
committerJoachim Nilsson <troglobit@gmail.com>2012-11-02 14:40:48 (GMT)
commitebb52b986515f0c3297de0449e2e75327321d1be (patch)
tree76a1aaffa2d7de6443191e8c4ec2a2f49181f743
parentd9e21796790d2b358160b655fac6770b0b21af57 (diff)
Support for external process supervisor1.2
This commit adds support for interfacing with an external process supervisor. This is common in more safety critical applications where liveness of processes' main loop must be verified. No extra command line option is needed to activate support for an external kick. Simply use SIGUSR1 to assume control of the wdt and continuously kick it from another process. Use SIGUSR2 to signal graceful exit, in which case watchdogd will resume using its built-in kick. If SIGUSR2 is not used when the supervisor exits, or if SIGUSR1 is not continuously received, the system will reboot as expected when the HW wdt timeout is reached. Signed-off-by: Joachim Nilsson <troglobit@gmail.com>
-rw-r--r--Makefile4
-rw-r--r--pidfile.c105
-rw-r--r--watchdogd.c104
3 files changed, 186 insertions, 27 deletions
diff --git a/Makefile b/Makefile
index ac6eb51..f6652f4 100644
--- a/Makefile
+++ b/Makefile
@@ -19,9 +19,9 @@
# VERSION ?= $(shell git tag -l | tail -1)
VERSION ?= 1.2
EXEC = watchdogd
-OBJS = watchdogd.o daemonize.o
+OBJS = watchdogd.o daemonize.o pidfile.o
CFLAGS += -W -Wall -Werror
-CPPFLAGS += -DVERSION=\"$(VERSION)\"
+CPPFLAGS += -D_GNU_SOURCE -DVERSION=\"$(VERSION)\"
all: $(EXEC)
diff --git a/pidfile.c b/pidfile.c
new file mode 100644
index 0000000..628a472
--- /dev/null
+++ b/pidfile.c
@@ -0,0 +1,105 @@
+/* $OpenBSD: pidfile.c,v 1.8 2008/06/26 05:42:05 ray Exp $ */
+/* $NetBSD: pidfile.c,v 1.4 2001/02/19 22:43:42 cgd Exp $ */
+
+/*-
+ * Copyright (c) 1999 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static char *pidfile_path;
+static pid_t pidfile_pid;
+
+static void pidfile_cleanup(void);
+
+extern char *__progname;
+
+int
+pidfile(const char *basename)
+{
+ int save_errno, result;
+ pid_t pid;
+ FILE *f;
+
+ if (basename == NULL)
+ basename = __progname;
+
+ if (pidfile_path != NULL) {
+ free(pidfile_path);
+ pidfile_path = NULL;
+ }
+
+ /* _PATH_VARRUN includes trailing / */
+ result = asprintf(&pidfile_path, "%s%s.pid", _PATH_VARRUN, basename);
+ if (result == -1 || pidfile_path == NULL)
+ return (-1);
+
+ if ((f = fopen(pidfile_path, "w")) == NULL) {
+ save_errno = errno;
+ free(pidfile_path);
+ pidfile_path = NULL;
+ errno = save_errno;
+ return (-1);
+ }
+
+ pid = getpid();
+ if (fprintf(f, "%ld\n", (long)pid) <= 0 || fclose(f) != 0) {
+ save_errno = errno;
+ (void) unlink(pidfile_path);
+ free(pidfile_path);
+ pidfile_path = NULL;
+ errno = save_errno;
+ return (-1);
+ }
+
+ pidfile_pid = pid;
+ if (atexit(pidfile_cleanup) < 0) {
+ save_errno = errno;
+ (void) unlink(pidfile_path);
+ free(pidfile_path);
+ pidfile_path = NULL;
+ pidfile_pid = 0;
+ errno = save_errno;
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+pidfile_cleanup(void)
+{
+
+ if (pidfile_path != NULL && pidfile_pid == getpid())
+ (void) unlink(pidfile_path);
+}
diff --git a/watchdogd.c b/watchdogd.c
index 0d10ad5..ce52f97 100644
--- a/watchdogd.c
+++ b/watchdogd.c
@@ -35,17 +35,21 @@
#define WDT_KICK_DEFAULT (WDT_TIMEOUT_DEFAULT / 2)
#define UNUSED(arg) arg __attribute__((unused))
-#define print(prio, fmt, args...) (sys_log ? syslog(prio, fmt, ##args) : fprintf(stderr, fmt, ##args))
-#define ERROR(fmt, args...) print(LOG_DAEMON | LOG_ERR, "%s: " fmt, __progname, ##args)
-#define PERROR(fmt, args...) print(LOG_DAEMON | LOG_ERR, "%s: " fmt ": %s\n", __progname, ##args, strerror(errno))
-#define DEBUG(fmt, args...) do { if (verbose) print(LOG_DAEMON | LOG_DEBUG, "%s: " fmt, __progname, ##args); } while(0)
+#define print(prio, fmt, args...) (sys_log ? syslog(prio, fmt, ##args) \
+ : fprintf(stderr, "%s: " fmt "\n", __progname, ##args))
+#define ERROR(fmt, args...) print(LOG_DAEMON | LOG_ERR, fmt, ##args)
+#define PERROR(fmt, args...) print(LOG_DAEMON | LOG_ERR, fmt ": %s", ##args, strerror(errno))
+#define DEBUG(fmt, args...) do { if (verbose) print(LOG_DAEMON | LOG_DEBUG, fmt, ##args); } while(0)
+#define INFO(fmt, args...) print(LOG_DAEMON | LOG_INFO, fmt, ##args)
int fd = -1;
int verbose = 0;
int sys_log = 0;
+int extkick = 0;
extern char *__progname;
int daemonize(char *output);
+int pidfile(const char *basename);
/*
@@ -53,11 +57,11 @@ int daemonize(char *output);
* the PC Watchdog card to reset its internal timer so it doesn't trigger
* a computer reset.
*/
-static void wdt_kick(void)
+static void wdt_kick(char *msg)
{
int dummy;
- DEBUG("Kicking watchdog.\n");
+ DEBUG("%s", msg ?: "Kicking watchdog.");
ioctl(fd, WDIOC_KEEPALIVE, &dummy);
}
@@ -65,11 +69,11 @@ static void wdt_set_timeout(int count)
{
int arg = count;
- DEBUG("Setting watchdog timeout to %d sec.\n", count);
+ DEBUG("Setting watchdog timeout to %d sec.", count);
if (ioctl(fd, WDIOC_SETTIMEOUT, &arg))
PERROR("Failed setting HW watchdog timeout");
else
- DEBUG("Previous timeout was %d sec\n", arg);
+ DEBUG("Previous timeout was %d sec", arg);
}
static int wdt_get_timeout(void)
@@ -80,7 +84,7 @@ static int wdt_get_timeout(void)
if ((err = ioctl(fd, WDIOC_GETTIMEOUT, &count)))
count = err;
- DEBUG("Watchdog timeout is set to %d sec.\n", count);
+ DEBUG("Watchdog timeout is set to %d sec.", count);
return count;
}
@@ -88,22 +92,49 @@ static int wdt_get_timeout(void)
static void wdt_magic_close(int UNUSED(signo))
{
if (fd != -1) {
- DEBUG("Safe exit, disabling HW watchdog.\n");
+ DEBUG("Safe exit, disabling HW watchdog.");
write(fd, "V", 1);
close(fd);
}
exit(0);
}
-static void setup_magic_close(void)
+static void wdt_external_kick(int UNUSED(signo))
+{
+ if (!extkick)
+ INFO("External supervisor now controls watchdog kick via SIGUSR1.");
+
+ extkick = 1;
+ wdt_kick("External watchdog kick.");
+}
+
+static void wdt_external_kick_exit(int UNUSED(signo))
+{
+ INFO("External supervisor requested safe exit. Reverting to built-in kick.");
+ extkick = 0;
+}
+
+static void setup_signals(int magic)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
- sa.sa_handler = wdt_magic_close;
sigemptyset(&sa.sa_mask);
- sigaction(SIGINT, &sa, NULL);
- sigaction(SIGTERM, &sa, NULL);
+
+ /* When --safe-exit is selected */
+ if (magic) {
+ sa.sa_handler = wdt_magic_close;
+ sigaction(SIGINT, &sa, NULL);
+ sigaction(SIGTERM, &sa, NULL);
+ }
+
+ /* Kick from external process supervisor */
+ sa.sa_handler = wdt_external_kick;
+ sigaction(SIGUSR1, &sa, NULL);
+
+ /* Handle graceful exit by external supervisor */
+ sa.sa_handler = wdt_external_kick_exit;
+ sigaction(SIGUSR2, &sa, NULL);
}
static int usage(int status)
@@ -113,6 +144,7 @@ static int usage(int status)
"Options:\n"
" --foreground, -f Start in foreground (background is default)\n"
" --logfile, -l <file> Log to <file> when backgrounding, otherwise silent\n"
+ " --syslog, -L Use syslog, even if in foreground\n"
" --timeout, -w <sec> Set the HW watchdog timeout to <sec> seconds\n"
" --interval, -k <sec> Set watchdog kick interval to <sec> seconds\n"
" --safe-exit, -s Disable watchdog on exit from SIGINT/SIGTERM\n"
@@ -130,11 +162,13 @@ int main(int argc, char *argv[])
int real_timeout = 0;
int period = -1;
int background = 1;
+ int magic = 0;
int c;
char *logfile = NULL;
struct option long_options[] = {
{"foreground", 0, 0, 'f'},
{"logfile", 1, 0, 'l'},
+ {"syslog", 0, 0, 'L'},
{"timeout", 1, 0, 'w'},
{"interval", 1, 0, 'k'},
{"safe-exit", 0, 0, 's'},
@@ -144,7 +178,7 @@ int main(int argc, char *argv[])
{NULL, 0, 0, 0}
};
- while ((c = getopt_long(argc, argv, "fl:w:k:sVvh?", long_options, NULL)) != EOF) {
+ while ((c = getopt_long(argc, argv, "fl:Lw:k:sVvh?", long_options, NULL)) != EOF) {
switch (c) {
case 'f': /* Run in foreground */
background = 0;
@@ -152,12 +186,16 @@ int main(int argc, char *argv[])
case 'l': /* Log to file */
if (!optarg) {
- ERROR("Missing logfile argument.\n");
+ ERROR("Missing logfile argument.");
return usage(1);
}
logfile = strdup(optarg);
break;
+ case 'L': /* Force use of syslog, regardless */
+ sys_log = 1;
+ break;
+
case 'w': /* Watchdog timeout */
timeout = atoi(optarg);
break;
@@ -167,7 +205,7 @@ int main(int argc, char *argv[])
break;
case 's': /* Safe exit, i.e., don't reboot if we exit and close device */
- setup_magic_close();
+ magic = 1;
break;
case 'v':
@@ -197,15 +235,21 @@ int main(int argc, char *argv[])
/* Exit on parent or error. */
pid = daemonize(logfile);
if (pid)
- exit(pid < 0 ? 1 : 0);
-
- DEBUG("Starting in deamon mode.\n");
+ return pid < 0 ? 1 : 0;
}
+ INFO("Userspace watchdog daemon v%s starting ...", VERSION);
+
+ /* Setup callbacks for SIGUSR1 and, optionally, exit magic on SIGINT/SIGTERM */
+ setup_signals(magic);
+
+ if (pidfile(NULL))
+ PERROR("Cannot create pidfile");
+
fd = open(WDT_DEVNODE, O_WRONLY);
if (fd == -1) {
PERROR("Failed opening watchdog device, %s", WDT_DEVNODE);
- exit(1);
+ return 1;
}
wdt_set_timeout(timeout);
@@ -215,7 +259,7 @@ int main(int argc, char *argv[])
PERROR("Failed reading current watchdog timeout");
} else {
if (real_timeout <= period) {
- ERROR("Warning, watchdog timeout <= kick interval: %d <= %d\n",
+ ERROR("Warning, watchdog timeout <= kick interval: %d <= %d",
real_timeout, period);
}
}
@@ -227,11 +271,21 @@ int main(int argc, char *argv[])
else
period = real_timeout / 2;
}
- DEBUG("Watchdog kick interval set to %d sec.\n", period);
+ DEBUG("Watchdog kick interval set to %d sec.", period);
+ /* When an external supervisor once has started sending SIGUSR1
+ * it fully assumes responsibility for kicking. No magic here. */
while (1) {
- wdt_kick();
- sleep(period);
+ int rem;
+
+ if (!extkick)
+ wdt_kick(NULL);
+
+ /* Check if awaken by signal */
+ rem = period;
+ do {
+ rem = sleep(rem);
+ } while (rem > 0);
}
}