diff -up at-3.1.13/atd.c.aborted at-3.1.13/atd.c
--- at-3.1.13/atd.c.aborted 2016-04-22 13:30:58.563029540 +0200
+++ at-3.1.13/atd.c 2017-09-14 16:00:38.109011916 +0200
@@ -74,6 +74,9 @@
#include <syslog.h>
#endif
+#include <sys/file.h>
+#include <utime.h>
+
/* Local headers */
#include "privs.h"
@@ -285,7 +288,7 @@ run_file(const char *filename, uid_t uid
* mail to the user.
*/
pid_t pid;
- int fd_out, fd_in;
+ int fd_out, fd_in, fd_std;
char jobbuf[9];
char *mailname = NULL;
int mailsize = 128;
@@ -404,6 +407,10 @@ run_file(const char *filename, uid_t uid
fcntl(fd_in, F_SETFD, fflags & ~FD_CLOEXEC);
+ if (flock(fd_in, LOCK_EX | LOCK_NB) != 0)
+ perr("Somebody already locked the job %8lu (%.500s) - "
+ "aborting", jobno, filename);
+
/*
* If the spool directory is mounted via NFS `atd' isn't able to
* read from the job file and will bump out here. The file is
@@ -563,10 +570,7 @@ run_file(const char *filename, uid_t uid
PRIV_END
}
/* We're the parent. Let's wait.
- */
- close(fd_in);
-
- /* We inherited the master's SIGCHLD handler, which does a
+ We inherited the master's SIGCHLD handler, which does a
non-blocking waitpid. So this blocking one will eventually
return with an ECHILD error.
*/
@@ -583,14 +587,14 @@ run_file(const char *filename, uid_t uid
/* some sendmail implementations are confused if stdout, stderr are
* not available, so let them point to /dev/null
*/
- if ((fd_in = open("/dev/null", O_WRONLY)) < 0)
+ if ((fd_std = open("/dev/null", O_WRONLY)) < 0)
perr("Could not open /dev/null.");
- if (dup2(fd_in, STDOUT_FILENO) < 0)
+ if (dup2(fd_std, STDOUT_FILENO) < 0)
perr("Could not use /dev/null as standard output.");
- if (dup2(fd_in, STDERR_FILENO) < 0)
+ if (dup2(fd_std, STDERR_FILENO) < 0)
perr("Could not use /dev/null as standard error.");
- if (fd_in != STDOUT_FILENO && fd_in != STDERR_FILENO)
- close(fd_in);
+ if (fd_std != STDOUT_FILENO && fd_std != STDERR_FILENO)
+ close(fd_std);
if (unlink(filename) == -1)
syslog(LOG_WARNING, "Warning: removing output file for job %li failed: %s",
@@ -598,7 +602,12 @@ run_file(const char *filename, uid_t uid
/* The job is now finished. We can delete its input file.
*/
- chdir(ATJOB_DIR);
+ if (chdir(ATJOB_DIR) != 0)
+ perr("Somebody removed %s directory from under us.", ATJOB_DIR);
+
+ /* This also removes the flock */
+ (void)close(fd_in);
+
unlink(newname);
free(newname);
@@ -642,7 +651,7 @@ run_file(const char *filename, uid_t uid
PRIV_END
}
else if ( mail_pid == -1 ) {
- perr("fork of mailer failed");
+ syslog(LOG_ERR, "fork of mailer failed: %m");
}
else {
/* Parent */
@@ -738,8 +747,16 @@ run_loop()
/* Skip lock files */
if (queue == '=') {
if ((buf.st_nlink == 1) && (run_time + CHECK_INTERVAL <= now)) {
- /* Remove stale lockfile FIXME: lock the lockfile, if you fail, it's still in use. */
- unlink(dirent->d_name);
+ int fd;
+
+ fd = open(dirent->d_name, O_RDONLY);
+ if (fd != -1) {
+ if (flock(fd, LOCK_EX | LOCK_NB) == 0) {
+ unlink(dirent->d_name);
+ syslog(LOG_NOTICE, "removing stale lock file %s\n", dirent->d_name);
+ }
+ (void)close(fd);
+ }
}
continue;
}
@@ -752,12 +769,17 @@ run_loop()
/* Is the file already locked?
*/
if (buf.st_nlink > 1) {
+ if (run_time < buf.st_mtime)
+ run_time = buf.st_mtime;
if (run_time + CHECK_INTERVAL <= now) {
-
/* Something went wrong the last time this was executed.
* Let's remove the lockfile and reschedule.
+ * We also change the timestamp to avoid rerunning the job more
+ * than once every CHECK_INTERVAL.
*/
strncpy(lock_name, dirent->d_name, sizeof(lock_name));
+ if (utime(lock_name, 0) < 0)
+ syslog(LOG_ERR, "utime couldn't be set for lock file %s\n", lock_name);
lock_name[sizeof(lock_name)-1] = '\0';
lock_name[0] = '=';
unlink(lock_name);