9c0e26
From 3d99c51e1b38a440804a55c9f314f62cc50b8902 Mon Sep 17 00:00:00 2001
9c0e26
From: Giuseppe Scrivano <gscrivan@redhat.com>
9c0e26
Date: Fri, 25 May 2018 18:04:06 +0200
9c0e26
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
9c0e26
9c0e26
if NOTIFY_SOCKET is used, do not block the main runc process waiting
9c0e26
for events on the notify socket.  Bind mount the parent directory of
9c0e26
the notify socket, so that "start" can create the socket and it is
9c0e26
still accessible from the container.
9c0e26
9c0e26
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
9c0e26
---
9c0e26
 notify_socket.go | 112 ++++++++++++++++++++++++++++++++++-------------
9c0e26
 signals.go       |   4 +-
9c0e26
 start.go         |  13 +++++-
9c0e26
 utils_linux.go   |  12 ++++-
9c0e26
 4 files changed, 105 insertions(+), 36 deletions(-)
9c0e26
9c0e26
diff --git a/notify_socket.go b/notify_socket.go
9c0e26
index e7453c62..d961453a 100644
9c0e26
--- a/notify_socket.go
9c0e26
+++ b/notify_socket.go
9c0e26
@@ -7,11 +7,13 @@ import (
9c0e26
 	"fmt"
9c0e26
 	"net"
9c0e26
 	"os"
9c0e26
+	"path"
9c0e26
 	"path/filepath"
9c0e26
+	"strconv"
9c0e26
+	"time"
9c0e26
 
9c0e26
+	"github.com/opencontainers/runc/libcontainer"
9c0e26
 	"github.com/opencontainers/runtime-spec/specs-go"
9c0e26
-
9c0e26
-	"github.com/sirupsen/logrus"
9c0e26
 	"github.com/urfave/cli"
9c0e26
 )
9c0e26
 
9c0e26
@@ -27,12 +29,12 @@ func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *
9c0e26
 	}
9c0e26
 
9c0e26
 	root := filepath.Join(context.GlobalString("root"), id)
9c0e26
-	path := filepath.Join(root, "notify.sock")
9c0e26
+	socketPath := filepath.Join(root, "notify", "notify.sock")
9c0e26
 
9c0e26
 	notifySocket := &notifySocket{
9c0e26
 		socket:     nil,
9c0e26
 		host:       notifySocketHost,
9c0e26
-		socketPath: path,
9c0e26
+		socketPath: socketPath,
9c0e26
 	}
9c0e26
 
9c0e26
 	return notifySocket
9c0e26
@@ -44,13 +46,19 @@ func (s *notifySocket) Close() error {
9c0e26
 
9c0e26
 // If systemd is supporting sd_notify protocol, this function will add support
9c0e26
 // for sd_notify protocol from within the container.
9c0e26
-func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
9c0e26
-	mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}}
9c0e26
+func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) error {
9c0e26
+	pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
9c0e26
+	mount := specs.Mount{
9c0e26
+		Destination: path.Dir(pathInContainer),
9c0e26
+		Source:      path.Dir(s.socketPath),
9c0e26
+		Options:     []string{"bind", "nosuid", "noexec", "nodev", "ro"},
9c0e26
+	}
9c0e26
 	spec.Mounts = append(spec.Mounts, mount)
9c0e26
-	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
9c0e26
+	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", pathInContainer))
9c0e26
+	return nil
9c0e26
 }
9c0e26
 
9c0e26
-func (s *notifySocket) setupSocket() error {
9c0e26
+func (s *notifySocket) bindSocket() error {
9c0e26
 	addr := net.UnixAddr{
9c0e26
 		Name: s.socketPath,
9c0e26
 		Net:  "unixgram",
9c0e26
@@ -71,45 +79,89 @@ func (s *notifySocket) setupSocket() error {
9c0e26
 	return nil
9c0e26
 }
9c0e26
 
9c0e26
-// pid1 must be set only with -d, as it is used to set the new process as the main process
9c0e26
-// for the service in systemd
9c0e26
-func (s *notifySocket) run(pid1 int) {
9c0e26
-	buf := make([]byte, 512)
9c0e26
-	notifySocketHostAddr := net.UnixAddr{Name: s.host, Net: "unixgram"}
9c0e26
+func (s *notifySocket) setupSocketDirectory() error {
9c0e26
+	return os.Mkdir(path.Dir(s.socketPath), 0755)
9c0e26
+}
9c0e26
+
9c0e26
+func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
9c0e26
+	notifySocket := newNotifySocket(context, notifySocketHost, id)
9c0e26
+	if notifySocket == nil {
9c0e26
+		return nil, nil
9c0e26
+	}
9c0e26
+
9c0e26
+	if err := notifySocket.bindSocket(); err != nil {
9c0e26
+		return nil, err
9c0e26
+	}
9c0e26
+	return notifySocket, nil
9c0e26
+}
9c0e26
+
9c0e26
+func (n *notifySocket) waitForContainer(container libcontainer.Container) error {
9c0e26
+	s, err := container.State()
9c0e26
+	if err != nil {
9c0e26
+		return err
9c0e26
+	}
9c0e26
+	return n.run(s.InitProcessPid)
9c0e26
+}
9c0e26
+
9c0e26
+func (n *notifySocket) run(pid1 int) error {
9c0e26
+	if n.socket == nil {
9c0e26
+		return nil
9c0e26
+	}
9c0e26
+	notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
9c0e26
 	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
9c0e26
 	if err != nil {
9c0e26
-		logrus.Error(err)
9c0e26
-		return
9c0e26
+		return err
9c0e26
 	}
9c0e26
-	for {
9c0e26
-		r, err := s.socket.Read(buf)
9c0e26
-		if err != nil {
9c0e26
-			break
9c0e26
+
9c0e26
+	ticker := time.NewTicker(time.Millisecond * 100)
9c0e26
+	defer ticker.Stop()
9c0e26
+
9c0e26
+	fileChan := make(chan []byte)
9c0e26
+	go func() {
9c0e26
+		for {
9c0e26
+			buf := make([]byte, 512)
9c0e26
+			r, err := n.socket.Read(buf)
9c0e26
+			if err != nil {
9c0e26
+				return
9c0e26
+			}
9c0e26
+			got := buf[0:r]
9c0e26
+			if !bytes.HasPrefix(got, []byte("READY=")) {
9c0e26
+				continue
9c0e26
+			}
9c0e26
+			fileChan <- got
9c0e26
+			return
9c0e26
 		}
9c0e26
-		var out bytes.Buffer
9c0e26
-		for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
9c0e26
-			if bytes.HasPrefix(line, []byte("READY=")) {
9c0e26
+	}()
9c0e26
+
9c0e26
+	for {
9c0e26
+		select {
9c0e26
+		case <-ticker.C:
9c0e26
+			_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
9c0e26
+			if err != nil {
9c0e26
+				return nil
9c0e26
+			}
9c0e26
+		case b := <-fileChan:
9c0e26
+			for _, line := range bytes.Split(b, []byte{'\n'}) {
9c0e26
+				var out bytes.Buffer
9c0e26
 				_, err = out.Write(line)
9c0e26
 				if err != nil {
9c0e26
-					return
9c0e26
+					return err
9c0e26
 				}
9c0e26
 
9c0e26
 				_, err = out.Write([]byte{'\n'})
9c0e26
 				if err != nil {
9c0e26
-					return
9c0e26
+					return err
9c0e26
 				}
9c0e26
 
9c0e26
 				_, err = client.Write(out.Bytes())
9c0e26
 				if err != nil {
9c0e26
-					return
9c0e26
+					return err
9c0e26
 				}
9c0e26
 
9c0e26
 				// now we can inform systemd to use pid1 as the pid to monitor
9c0e26
-				if pid1 > 0 {
9c0e26
-					newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
9c0e26
-					client.Write([]byte(newPid))
9c0e26
-				}
9c0e26
-				return
9c0e26
+				newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
9c0e26
+				client.Write([]byte(newPid))
9c0e26
+				return nil
9c0e26
 			}
9c0e26
 		}
9c0e26
 	}
9c0e26
diff --git a/signals.go b/signals.go
9c0e26
index b67f65a0..dd25e094 100644
9c0e26
--- a/signals.go
9c0e26
+++ b/signals.go
9c0e26
@@ -70,6 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
9c0e26
 			h.notifySocket.run(pid1)
9c0e26
 			return 0, nil
9c0e26
 		}
9c0e26
+		h.notifySocket.run(os.Getpid())
9c0e26
 		go h.notifySocket.run(0)
9c0e26
 	}
9c0e26
 
9c0e26
@@ -97,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
9c0e26
 					// status because we must ensure that any of the go specific process
9c0e26
 					// fun such as flushing pipes are complete before we return.
9c0e26
 					process.Wait()
9c0e26
-					if h.notifySocket != nil {
9c0e26
-						h.notifySocket.Close()
9c0e26
-					}
9c0e26
 					return e.status, nil
9c0e26
 				}
9c0e26
 			}
9c0e26
diff --git a/start.go b/start.go
9c0e26
index 2bb698b2..3a1769a4 100644
9c0e26
--- a/start.go
9c0e26
+++ b/start.go
9c0e26
@@ -3,6 +3,7 @@ package main
9c0e26
 import (
9c0e26
 	"errors"
9c0e26
 	"fmt"
9c0e26
+	"os"
9c0e26
 
9c0e26
 	"github.com/opencontainers/runc/libcontainer"
9c0e26
 	"github.com/urfave/cli"
9c0e26
@@ -31,7 +32,17 @@ your host.`,
9c0e26
 		}
9c0e26
 		switch status {
9c0e26
 		case libcontainer.Created:
9c0e26
-			return container.Exec()
9c0e26
+			notifySocket, err := notifySocketStart(context, os.Getenv("NOTIFY_SOCKET"), container.ID())
9c0e26
+			if err != nil {
9c0e26
+				return err
9c0e26
+			}
9c0e26
+			if err := container.Exec(); err != nil {
9c0e26
+				return err
9c0e26
+			}
9c0e26
+			if notifySocket != nil {
9c0e26
+				return notifySocket.waitForContainer(container)
9c0e26
+			}
9c0e26
+			return nil
9c0e26
 		case libcontainer.Stopped:
9c0e26
 			return errors.New("cannot start a container that has stopped")
9c0e26
 		case libcontainer.Running:
9c0e26
diff --git a/utils_linux.go b/utils_linux.go
9c0e26
index 984e6b0f..46c26246 100644
9c0e26
--- a/utils_linux.go
9c0e26
+++ b/utils_linux.go
9c0e26
@@ -408,7 +408,9 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
9c0e26
 
9c0e26
 	notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
9c0e26
 	if notifySocket != nil {
9c0e26
-		notifySocket.setupSpec(context, spec)
9c0e26
+		if err := notifySocket.setupSpec(context, spec); err != nil {
9c0e26
+			return -1, err
9c0e26
+		}
9c0e26
 	}
9c0e26
 
9c0e26
 	container, err := createContainer(context, id, spec)
9c0e26
@@ -417,10 +419,16 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
9c0e26
 	}
9c0e26
 
9c0e26
 	if notifySocket != nil {
9c0e26
-		err := notifySocket.setupSocket()
9c0e26
+		err := notifySocket.setupSocketDirectory()
9c0e26
 		if err != nil {
9c0e26
 			return -1, err
9c0e26
 		}
9c0e26
+		if action == CT_ACT_RUN {
9c0e26
+			err := notifySocket.bindSocket()
9c0e26
+			if err != nil {
9c0e26
+				return -1, err
9c0e26
+			}
9c0e26
+		}
9c0e26
 	}
9c0e26
 
9c0e26
 	// Support on-demand socket activation by passing file descriptors into the container init process.
9c0e26
-- 
9c0e26
2.21.0
9c0e26