diff -up at-3.1.13/atd.c.aborted at-3.1.13/atd.c --- at-3.1.13/atd.c.aborted 2016-04-22 13:30:58.563029540 +0200 +++ at-3.1.13/atd.c 2017-09-14 16:00:38.109011916 +0200 @@ -74,6 +74,9 @@ #include #endif +#include +#include + /* Local headers */ #include "privs.h" @@ -285,7 +288,7 @@ run_file(const char *filename, uid_t uid * mail to the user. */ pid_t pid; - int fd_out, fd_in; + int fd_out, fd_in, fd_std; char jobbuf[9]; char *mailname = NULL; int mailsize = 128; @@ -404,6 +407,10 @@ run_file(const char *filename, uid_t uid fcntl(fd_in, F_SETFD, fflags & ~FD_CLOEXEC); + if (flock(fd_in, LOCK_EX | LOCK_NB) != 0) + perr("Somebody already locked the job %8lu (%.500s) - " + "aborting", jobno, filename); + /* * If the spool directory is mounted via NFS `atd' isn't able to * read from the job file and will bump out here. The file is @@ -563,10 +570,7 @@ run_file(const char *filename, uid_t uid PRIV_END } /* We're the parent. Let's wait. - */ - close(fd_in); - - /* We inherited the master's SIGCHLD handler, which does a + We inherited the master's SIGCHLD handler, which does a non-blocking waitpid. So this blocking one will eventually return with an ECHILD error. */ @@ -583,14 +587,14 @@ run_file(const char *filename, uid_t uid /* some sendmail implementations are confused if stdout, stderr are * not available, so let them point to /dev/null */ - if ((fd_in = open("/dev/null", O_WRONLY)) < 0) + if ((fd_std = open("/dev/null", O_WRONLY)) < 0) perr("Could not open /dev/null."); - if (dup2(fd_in, STDOUT_FILENO) < 0) + if (dup2(fd_std, STDOUT_FILENO) < 0) perr("Could not use /dev/null as standard output."); - if (dup2(fd_in, STDERR_FILENO) < 0) + if (dup2(fd_std, STDERR_FILENO) < 0) perr("Could not use /dev/null as standard error."); - if (fd_in != STDOUT_FILENO && fd_in != STDERR_FILENO) - close(fd_in); + if (fd_std != STDOUT_FILENO && fd_std != STDERR_FILENO) + close(fd_std); if (unlink(filename) == -1) syslog(LOG_WARNING, "Warning: removing output file for job %li failed: %s", @@ -598,7 +602,12 @@ run_file(const char *filename, uid_t uid /* The job is now finished. We can delete its input file. */ - chdir(ATJOB_DIR); + if (chdir(ATJOB_DIR) != 0) + perr("Somebody removed %s directory from under us.", ATJOB_DIR); + + /* This also removes the flock */ + (void)close(fd_in); + unlink(newname); free(newname); @@ -642,7 +651,7 @@ run_file(const char *filename, uid_t uid PRIV_END } else if ( mail_pid == -1 ) { - perr("fork of mailer failed"); + syslog(LOG_ERR, "fork of mailer failed: %m"); } else { /* Parent */ @@ -738,8 +747,16 @@ run_loop() /* Skip lock files */ if (queue == '=') { if ((buf.st_nlink == 1) && (run_time + CHECK_INTERVAL <= now)) { - /* Remove stale lockfile FIXME: lock the lockfile, if you fail, it's still in use. */ - unlink(dirent->d_name); + int fd; + + fd = open(dirent->d_name, O_RDONLY); + if (fd != -1) { + if (flock(fd, LOCK_EX | LOCK_NB) == 0) { + unlink(dirent->d_name); + syslog(LOG_NOTICE, "removing stale lock file %s\n", dirent->d_name); + } + (void)close(fd); + } } continue; } @@ -752,12 +769,17 @@ run_loop() /* Is the file already locked? */ if (buf.st_nlink > 1) { + if (run_time < buf.st_mtime) + run_time = buf.st_mtime; if (run_time + CHECK_INTERVAL <= now) { - /* Something went wrong the last time this was executed. * Let's remove the lockfile and reschedule. + * We also change the timestamp to avoid rerunning the job more + * than once every CHECK_INTERVAL. */ strncpy(lock_name, dirent->d_name, sizeof(lock_name)); + if (utime(lock_name, 0) < 0) + syslog(LOG_ERR, "utime couldn't be set for lock file %s\n", lock_name); lock_name[sizeof(lock_name)-1] = '\0'; lock_name[0] = '='; unlink(lock_name);