4db318
From cd9b959b34c183cf6cd031af678c4ec66b765080 Mon Sep 17 00:00:00 2001
4db318
From: Giuseppe Scrivano <gscrivan@redhat.com>
4db318
Date: Fri, 25 May 2018 18:04:06 +0200
4db318
Subject: [PATCH] sd-notify: do not hang when NOTIFY_SOCKET is used with create
4db318
4db318
if NOTIFY_SOCKET is used, do not block the main runc process waiting
4db318
for events on the notify socket.  Bind mount the parent directory of
4db318
the notify socket, so that "start" can create the socket and it is
4db318
still accessible from the container.
4db318
4db318
Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
4db318
---
4db318
 notify_socket.go | 113 ++++++++++++++++++++++++++++++++++-------------
4db318
 signals.go       |   5 +--
4db318
 start.go         |  13 +++++-
4db318
 utils_linux.go   |  12 ++++-
4db318
 4 files changed, 106 insertions(+), 37 deletions(-)
4db318
4db318
diff --git a/notify_socket.go b/notify_socket.go
4db318
index cd6c0a98..7fbd2e73 100644
4db318
--- a/notify_socket.go
4db318
+++ b/notify_socket.go
4db318
@@ -6,11 +6,14 @@ import (
4db318
 	"bytes"
4db318
 	"fmt"
4db318
 	"net"
4db318
+	"os"
4db318
+	"path"
4db318
 	"path/filepath"
4db318
+	"strconv"
4db318
+	"time"
4db318
 
4db318
+	"github.com/opencontainers/runc/libcontainer"
4db318
 	"github.com/opencontainers/runtime-spec/specs-go"
4db318
-
4db318
-	"github.com/sirupsen/logrus"
4db318
 	"github.com/urfave/cli"
4db318
 )
4db318
 
4db318
@@ -26,12 +29,12 @@ func newNotifySocket(context *cli.Context, notifySocketHost string, id string) *
4db318
 	}
4db318
 
4db318
 	root := filepath.Join(context.GlobalString("root"), id)
4db318
-	path := filepath.Join(root, "notify.sock")
4db318
+	socketPath := filepath.Join(root, "notify", "notify.sock")
4db318
 
4db318
 	notifySocket := &notifySocket{
4db318
 		socket:     nil,
4db318
 		host:       notifySocketHost,
4db318
-		socketPath: path,
4db318
+		socketPath: socketPath,
4db318
 	}
4db318
 
4db318
 	return notifySocket
4db318
@@ -43,13 +46,19 @@ func (ns *notifySocket) Close() error {
4db318
 
4db318
 // If systemd is supporting sd_notify protocol, this function will add support
4db318
 // for sd_notify protocol from within the container.
4db318
-func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) {
4db318
-	mount := specs.Mount{Destination: s.host, Source: s.socketPath, Options: []string{"bind"}}
4db318
+func (s *notifySocket) setupSpec(context *cli.Context, spec *specs.Spec) error {
4db318
+	pathInContainer := filepath.Join("/run/notify", path.Base(s.socketPath))
4db318
+	mount := specs.Mount{
4db318
+		Destination: path.Dir(pathInContainer),
4db318
+		Source:      path.Dir(s.socketPath),
4db318
+		Options:     []string{"bind", "nosuid", "noexec", "nodev", "ro"},
4db318
+	}
4db318
 	spec.Mounts = append(spec.Mounts, mount)
4db318
-	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", s.host))
4db318
+	spec.Process.Env = append(spec.Process.Env, fmt.Sprintf("NOTIFY_SOCKET=%s", pathInContainer))
4db318
+	return nil
4db318
 }
4db318
 
4db318
-func (s *notifySocket) setupSocket() error {
4db318
+func (s *notifySocket) bindSocket() error {
4db318
 	addr := net.UnixAddr{
4db318
 		Name: s.socketPath,
4db318
 		Net:  "unixgram",
4db318
@@ -64,45 +73,89 @@ func (s *notifySocket) setupSocket() error {
4db318
 	return nil
4db318
 }
4db318
 
4db318
-// pid1 must be set only with -d, as it is used to set the new process as the main process
4db318
-// for the service in systemd
4db318
-func (notifySocket *notifySocket) run(pid1 int) {
4db318
-	buf := make([]byte, 512)
4db318
-	notifySocketHostAddr := net.UnixAddr{Name: notifySocket.host, Net: "unixgram"}
4db318
+func (s *notifySocket) setupSocketDirectory() error {
4db318
+	return os.Mkdir(path.Dir(s.socketPath), 0755)
4db318
+}
4db318
+
4db318
+func notifySocketStart(context *cli.Context, notifySocketHost, id string) (*notifySocket, error) {
4db318
+	notifySocket := newNotifySocket(context, notifySocketHost, id)
4db318
+	if notifySocket == nil {
4db318
+		return nil, nil
4db318
+	}
4db318
+
4db318
+	if err := notifySocket.bindSocket(); err != nil {
4db318
+		return nil, err
4db318
+	}
4db318
+	return notifySocket, nil
4db318
+}
4db318
+
4db318
+func (n *notifySocket) waitForContainer(container libcontainer.Container) error {
4db318
+	s, err := container.State()
4db318
+	if err != nil {
4db318
+		return err
4db318
+	}
4db318
+	return n.run(s.InitProcessPid)
4db318
+}
4db318
+
4db318
+func (n *notifySocket) run(pid1 int) error {
4db318
+	if n.socket == nil {
4db318
+		return nil
4db318
+	}
4db318
+	notifySocketHostAddr := net.UnixAddr{Name: n.host, Net: "unixgram"}
4db318
 	client, err := net.DialUnix("unixgram", nil, &notifySocketHostAddr)
4db318
 	if err != nil {
4db318
-		logrus.Error(err)
4db318
-		return
4db318
+		return err
4db318
 	}
4db318
-	for {
4db318
-		r, err := notifySocket.socket.Read(buf)
4db318
-		if err != nil {
4db318
-			break
4db318
+
4db318
+	ticker := time.NewTicker(time.Millisecond * 100)
4db318
+	defer ticker.Stop()
4db318
+
4db318
+	fileChan := make(chan []byte)
4db318
+	go func() {
4db318
+		for {
4db318
+			buf := make([]byte, 512)
4db318
+			r, err := n.socket.Read(buf)
4db318
+			if err != nil {
4db318
+				return
4db318
+			}
4db318
+			got := buf[0:r]
4db318
+			if !bytes.HasPrefix(got, []byte("READY=")) {
4db318
+				continue
4db318
+			}
4db318
+			fileChan <- got
4db318
+			return
4db318
 		}
4db318
-		var out bytes.Buffer
4db318
-		for _, line := range bytes.Split(buf[0:r], []byte{'\n'}) {
4db318
-			if bytes.HasPrefix(line, []byte("READY=")) {
4db318
+	}()
4db318
+
4db318
+	for {
4db318
+		select {
4db318
+		case <-ticker.C:
4db318
+			_, err := os.Stat(filepath.Join("/proc", strconv.Itoa(pid1)))
4db318
+			if err != nil {
4db318
+				return nil
4db318
+			}
4db318
+		case b := <-fileChan:
4db318
+			for _, line := range bytes.Split(b, []byte{'\n'}) {
4db318
+				var out bytes.Buffer
4db318
 				_, err = out.Write(line)
4db318
 				if err != nil {
4db318
-					return
4db318
+					return err
4db318
 				}
4db318
 
4db318
 				_, err = out.Write([]byte{'\n'})
4db318
 				if err != nil {
4db318
-					return
4db318
+					return err
4db318
 				}
4db318
 
4db318
 				_, err = client.Write(out.Bytes())
4db318
 				if err != nil {
4db318
-					return
4db318
+					return err
4db318
 				}
4db318
 
4db318
 				// now we can inform systemd to use pid1 as the pid to monitor
4db318
-				if pid1 > 0 {
4db318
-					newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
4db318
-					client.Write([]byte(newPid))
4db318
-				}
4db318
-				return
4db318
+				newPid := fmt.Sprintf("MAINPID=%d\n", pid1)
4db318
+				client.Write([]byte(newPid))
4db318
+				return nil
4db318
 			}
4db318
 		}
4db318
 	}
4db318
diff --git a/signals.go b/signals.go
4db318
index 1811de83..d0988cb3 100644
4db318
--- a/signals.go
4db318
+++ b/signals.go
4db318
@@ -70,7 +70,7 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
4db318
 			h.notifySocket.run(pid1)
4db318
 			return 0, nil
4db318
 		} else {
4db318
-			go h.notifySocket.run(0)
4db318
+			h.notifySocket.run(os.Getpid())
4db318
 		}
4db318
 	}
4db318
 
4db318
@@ -98,9 +98,6 @@ func (h *signalHandler) forward(process *libcontainer.Process, tty *tty, detach
4db318
 					// status because we must ensure that any of the go specific process
4db318
 					// fun such as flushing pipes are complete before we return.
4db318
 					process.Wait()
4db318
-					if h.notifySocket != nil {
4db318
-						h.notifySocket.Close()
4db318
-					}
4db318
 					return e.status, nil
4db318
 				}
4db318
 			}
4db318
diff --git a/start.go b/start.go
4db318
index 2bb698b2..3a1769a4 100644
4db318
--- a/start.go
4db318
+++ b/start.go
4db318
@@ -3,6 +3,7 @@ package main
4db318
 import (
4db318
 	"errors"
4db318
 	"fmt"
4db318
+	"os"
4db318
 
4db318
 	"github.com/opencontainers/runc/libcontainer"
4db318
 	"github.com/urfave/cli"
4db318
@@ -31,7 +32,17 @@ your host.`,
4db318
 		}
4db318
 		switch status {
4db318
 		case libcontainer.Created:
4db318
-			return container.Exec()
4db318
+			notifySocket, err := notifySocketStart(context, os.Getenv("NOTIFY_SOCKET"), container.ID())
4db318
+			if err != nil {
4db318
+				return err
4db318
+			}
4db318
+			if err := container.Exec(); err != nil {
4db318
+				return err
4db318
+			}
4db318
+			if notifySocket != nil {
4db318
+				return notifySocket.waitForContainer(container)
4db318
+			}
4db318
+			return nil
4db318
 		case libcontainer.Stopped:
4db318
 			return errors.New("cannot start a container that has stopped")
4db318
 		case libcontainer.Running:
4db318
diff --git a/utils_linux.go b/utils_linux.go
4db318
index c6a34897..77423f67 100644
4db318
--- a/utils_linux.go
4db318
+++ b/utils_linux.go
4db318
@@ -420,7 +420,9 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
4db318
 
4db318
 	notifySocket := newNotifySocket(context, os.Getenv("NOTIFY_SOCKET"), id)
4db318
 	if notifySocket != nil {
4db318
-		notifySocket.setupSpec(context, spec)
4db318
+		if err := notifySocket.setupSpec(context, spec); err != nil {
4db318
+			return -1, err
4db318
+		}
4db318
 	}
4db318
 
4db318
 	container, err := createContainer(context, id, spec)
4db318
@@ -429,10 +431,16 @@ func startContainer(context *cli.Context, spec *specs.Spec, action CtAct, criuOp
4db318
 	}
4db318
 
4db318
 	if notifySocket != nil {
4db318
-		err := notifySocket.setupSocket()
4db318
+		err := notifySocket.setupSocketDirectory()
4db318
 		if err != nil {
4db318
 			return -1, err
4db318
 		}
4db318
+		if action == CT_ACT_RUN {
4db318
+			err := notifySocket.bindSocket()
4db318
+			if err != nil {
4db318
+				return -1, err
4db318
+			}
4db318
+		}
4db318
 	}
4db318
 
4db318
 	// Support on-demand socket activation by passing file descriptors into the container init process.
4db318
-- 
4db318
2.17.1
4db318