9fdf04
From ecf53c23545092019602578583031c28fde4d2a1 Mon Sep 17 00:00:00 2001
9fdf04
From: Giuseppe Scrivano <gscrivan@redhat.com>
9fdf04
Date: Fri, 25 May 2018 18:04:06 +0200
9fdf04
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
9fdf04
9fdf04
if NOTIFY_SOCKET is used, do not block the main runc process waiting
9fdf04
for events on the notify socket.  Change the logic to create a new
9fdf04
process that monitors exclusively the notify socket until an event is
9fdf04
received.
9fdf04
9fdf04
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
9fdf04
---
9fdf04
 init.go          |  12 +++++++
9fdf04
 notify_socket.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++---------
9fdf04
 signals.go       |   5 +--
9fdf04
 3 files changed, 99 insertions(+), 19 deletions(-)
9fdf04
9fdf04
diff --git a/init.go b/init.go
9fdf04
index c8f453192..6a3d9e91c 100644
9fdf04
--- a/init.go
9fdf04
+++ b/init.go
9fdf04
@@ -20,6 +20,18 @@ var initCommand = cli.Command{
9fdf04
 	Name:  "init",
9fdf04
 	Usage: `initialize the namespaces and launch the process (do not call it outside of runc)`,
9fdf04
 	Action: func(context *cli.Context) error {
9fdf04
+		// If NOTIFY_SOCKET is used create a new process that stays around
9fdf04
+		// so to not block "runc start".  It will automatically exits when the
9fdf04
+		// container notifies that it is ready, or when the container is deleted
9fdf04
+		if os.Getenv("_NOTIFY_SOCKET_FD") != "" {
9fdf04
+			fd := os.Getenv("_NOTIFY_SOCKET_FD")
9fdf04
+			pid := os.Getenv("_NOTIFY_SOCKET_PID")
9fdf04
+			hostNotifySocket := os.Getenv("_NOTIFY_SOCKET_HOST")
9fdf04
+			notifySocketPath := os.Getenv("_NOTIFY_SOCKET_PATH")
9fdf04
+			notifySocketInit(fd, pid, hostNotifySocket, notifySocketPath)
9fdf04
+			os.Exit(0)
9fdf04
+		}
9fdf04
+
9fdf04
 		factory, _ := libcontainer.New("")
9fdf04
 		if err := factory.StartInitialization(); err != nil {
9fdf04
 			// as the error is sent back to the parent there is no need to log
9fdf04
diff --git a/notify_socket.go b/notify_socket.go
9fdf04
index cd6c0a989..e04e9d660 100644
9fdf04
--- a/notify_socket.go
9fdf04
+++ b/notify_socket.go
9fdf04
@@ -6,10 +6,13 @@ import (
9fdf04
 	"bytes"
9fdf04
 	"fmt"
9fdf04
 	"net"
9fdf04
+	"os"
9fdf04
+	"os/exec"
9fdf04
 	"path/filepath"
9fdf04
+	"strconv"
9fdf04
+	"time"
9fdf04
 
9fdf04
 	"github.com/opencontainers/runtime-spec/specs-go"
9fdf04
-
9fdf04
 	"github.com/sirupsen/logrus"
9fdf04
 	"github.com/urfave/cli"
9fdf04
 )
9fdf04
@@ -64,24 +67,94 @@ func (s *notifySocket) setupSocket() error {
9fdf04
 	return nil
9fdf04
 }
9fdf04
 
9fdf04
+func (notifySocket *notifySocket) notifyNewPid(pid int) {
9fdf04
+	notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
9fdf04
+	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
9fdf04
+	if err != nil {
9fdf04
+		return
9fdf04
+	}
9fdf04
+	newPid := fmt.Sprintf("MAINPID=%d\n", pid)
9fdf04
+	client.Write([]byte(newPid))
9fdf04
+}
9fdf04
+
9fdf04
 // pid1 must be set only with -d, as it is used to set the new process as the main process
9fdf04
 // for the service in systemd
9fdf04
 func (notifySocket *notifySocket) run(pid1 int) {
9fdf04
-	buf := make([]byte, 512)
9fdf04
-	notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
9fdf04
-	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
9fdf04
+	file, err := notifySocket.socket.File()
9fdf04
 	if err != nil {
9fdf04
 		logrus.Error(err)
9fdf04
 		return
9fdf04
 	}
9fdf04
-	for {
9fdf04
-		r, err := notifySocket.socket.Read(buf)
9fdf04
-		if err != nil {
9fdf04
-			break
9fdf04
+	defer file.Close()
9fdf04
+	defer notifySocket.socket.Close()
9fdf04
+
9fdf04
+	cmd := exec.Command("/proc/self/exe", "init")
9fdf04
+	cmd.ExtraFiles = []*os.File{file}
9fdf04
+	cmd.Env = append(cmd.Env, "_NOTIFY_SOCKET_FD=3",
9fdf04
+		fmt.Sprintf("_NOTIFY_SOCKET_PID=%d", pid1),
9fdf04
+		fmt.Sprintf("_NOTIFY_SOCKET_HOST=%s", notifySocket.host),
9fdf04
+		fmt.Sprintf("_NOTIFY_SOCKET_PATH=%s", notifySocket.socketPath))
9fdf04
+
9fdf04
+	if err := cmd.Start(); err != nil {
9fdf04
+		logrus.Fatal(err)
9fdf04
+	}
9fdf04
+	notifySocket.notifyNewPid(cmd.Process.Pid)
9fdf04
+	cmd.Process.Release()
9fdf04
+}
9fdf04
+
9fdf04
+func notifySocketInit(envFd string, envPid string, notifySocketHost string, notifySocketPath string) {
9fdf04
+	intFd, err := strconv.Atoi(envFd)
9fdf04
+	if err != nil {
9fdf04
+		return
9fdf04
+	}
9fdf04
+	pid1, err := strconv.Atoi(envPid)
9fdf04
+	if err != nil {
9fdf04
+		return
9fdf04
+	}
9fdf04
+
9fdf04
+	file := os.NewFile(uintptr(intFd), "unixgram")
9fdf04
+	defer file.Close()
9fdf04
+
9fdf04
+	fileChan := make(chan []byte)
9fdf04
+	exitChan := make(chan bool)
9fdf04
+
9fdf04
+	go func() {
9fdf04
+		for {
9fdf04
+			buf := make([]byte, 512)
9fdf04
+			r, err := file.Read(buf)
9fdf04
+			if err != nil {
9fdf04
+				return
9fdf04
+			}
9fdf04
+			fileChan <- buf[0:r]
9fdf04
 		}
9fdf04
-		var out bytes.Buffer
9fdf04
-		for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
9fdf04
-			if bytes.HasPrefix(line, []byte("READY=")) {
9fdf04
+	}()
9fdf04
+	go func() {
9fdf04
+		for {
9fdf04
+			if _, err := os.Stat(notifySocketPath); os.IsNotExist(err) {
9fdf04
+				exitChan <- true
9fdf04
+				return
9fdf04
+			}
9fdf04
+			time.Sleep(time.Second)
9fdf04
+		}
9fdf04
+	}()
9fdf04
+
9fdf04
+	notifySocketHostAddr := net.UnixAddr{Name: notifySocketHost, Net: "unixgram"}
9fdf04
+	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
9fdf04
+	if err != nil {
9fdf04
+		return
9fdf04
+	}
9fdf04
+
9fdf04
+	for {
9fdf04
+		select {
9fdf04
+		case <-exitChan:
9fdf04
+			return
9fdf04
+		case b := <-fileChan:
9fdf04
+			for _, line := range bytes.Split(b, []byte{'\n'}) {
9fdf04
+				if !bytes.HasPrefix(line, []byte("READY=")) {
9fdf04
+					continue
9fdf04
+				}
9fdf04
+
9fdf04
+				var out bytes.Buffer
9fdf04
 				_, err = out.Write(line)
9fdf04
 				if err != nil {
9fdf04
 					return
9fdf04
@@ -98,10 +171,8 @@ func (notifySocket *notifySocket) run(pid1 int) {
9fdf04
 				}
9fdf04
 
9fdf04
 				// now we can inform systemd to use pid1 as the pid to monitor
9fdf04
-				if pid1 > 0 {
9fdf04
-					newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
9fdf04
-					client.Write([]byte(newPid))
9fdf04
-				}
9fdf04
+				newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
9fdf04
+				client.Write([]byte(newPid))
9fdf04
 				return
9fdf04
 			}
9fdf04
 		}
9fdf04
diff --git a/signals.go b/signals.go
9fdf04
index 1811de837..d0988cb39 100644
9fdf04
--- a/signals.go
9fdf04
+++ b/signals.go
9fdf04
@@ -70,7 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
9fdf04
 			h.notifySocket.run(pid1)
9fdf04
 			return 0, nil
9fdf04
 		} else {
9fdf04
-			go h.notifySocket.run(0)
9fdf04
+			h.notifySocket.run(os.Getpid())
9fdf04
 		}
9fdf04
 	}
9fdf04
 
9fdf04
@@ -98,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
9fdf04
 					// status because we must ensure that any of the go specific process
9fdf04
 					// fun such as flushing pipes are complete before we return.
9fdf04
 					process.Wait()
9fdf04
-					if h.notifySocket != nil {
9fdf04
-						h.notifySocket.Close()
9fdf04
-					}
9fdf04
 					return e.status, nil
9fdf04
 				}
9fdf04
 			}