From 78b46ab8595c094da910c338839455d8386d57b8 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 14 Jun 2017 15:27:42 -0700 Subject: [PATCH 1/6] libcontainer/system/proc: Add Stat and Stat_t So we can extract more than the start time with a single read. Signed-off-by: W. Trevor King (cherry picked from commit 439eaa3584402d239297f278cc1f22c08dbdcc17) Signed-off-by: Kir Kolyshkin --- libcontainer/system/proc.go | 35 ++++++++++++++++++++++++++------ libcontainer/system/proc_test.go | 8 ++++---- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go index a0e96371..4ffb8331 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc.go @@ -1,23 +1,43 @@ package system import ( + "fmt" "io/ioutil" "path/filepath" "strconv" "strings" ) -// look in /proc to find the process start time so that we can verify -// that this pid has started after ourself +// Stat_t represents the information from /proc/[pid]/stat, as +// described in proc(5). +type Stat_t struct { + // StartTime is the number of clock ticks after system boot (since + // Linux 2.6). + StartTime uint64 +} + +// Stat returns a Stat_t instance for the specified process. +func Stat(pid int) (stat Stat_t, err error) { + bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + if err != nil { + return stat, err + } + data := string(bytes) + stat.StartTime, err = parseStartTime(data) + return stat, err +} + +// GetProcessStartTime is deprecated. Use Stat(pid) and +// Stat_t.StartTime instead. func GetProcessStartTime(pid int) (string, error) { - data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) + stat, err := Stat(pid) if err != nil { return "", err } - return parseStartTime(string(data)) + return fmt.Sprintf("%d", stat.StartTime), nil } -func parseStartTime(stat string) (string, error) { +func parseStartTime(stat string) (uint64, error) { // the starttime is located at pos 22 // from the man page // @@ -39,5 +59,8 @@ func parseStartTime(stat string) (string, error) { // get parts after last `)`: s := strings.Split(stat, ")") parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ") - return parts[22-3], nil // starts at 3 (after the filename pos `2`) + startTimeString := parts[22-3] // starts at 3 (after the filename pos `2`) + var startTime uint64 + fmt.Sscanf(startTimeString, "%d", &startTime) + return startTime, nil } diff --git a/libcontainer/system/proc_test.go b/libcontainer/system/proc_test.go index ba910291..c7615ef9 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc_test.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc_test.go @@ -3,10 +3,10 @@ package system import "testing" func TestParseStartTime(t *testing.T) { - data := map[string]string{ - "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": "9126532", - "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": "9214966", - "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": "8722075", + data := map[string]uint64{ + "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": 9126532, + "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": 9214966, + "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": 8722075, } for line, startTime := range data { st, err := parseStartTime(line) From c96dfcb196a08557e0ff434c2c190c288fb1b787 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 14 Jun 2017 15:38:45 -0700 Subject: [PATCH 2/6] libcontainer: Replace GetProcessStartTime with Stat_t.StartTime And convert the various start-time properties from strings to uint64s. This removes all internal consumers of the deprecated GetProcessStartTime function. Signed-off-by: W. Trevor King (cherry picked from commit 75d98b26b7acb0023b990a7305a2553c6dbc12d4) Signed-off-by: Kir Kolyshkin --- libcontainer/container.go | 2 +- libcontainer/container_linux.go | 10 +++++----- libcontainer/container_linux_test.go | 10 +++++----- libcontainer/process_linux.go | 12 +++++++----- libcontainer/restored_process.go | 12 ++++++------ 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/libcontainer/container.go b/libcontainer/container.go index d51b159b..899925dd 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container.go @@ -54,7 +54,7 @@ type BaseState struct { InitProcessPid int `json:"init_process_pid"` // InitProcessStartTime is the init process start time in clock cycles since boot time. - InitProcessStartTime string `json:"init_process_start"` + InitProcessStartTime uint64 `json:"init_process_start"` // Created is the unix timestamp for the creation time of the container in UTC Created time.Time `json:"created"` diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 45688d60..98efcfcc 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go @@ -37,7 +37,7 @@ type linuxContainer struct { cgroupManager cgroups.Manager initArgs []string initProcess parentProcess - initProcessStartTime string + initProcessStartTime uint64 criuPath string m sync.Mutex criuVersion int @@ -1117,11 +1117,11 @@ func (c *linuxContainer) refreshState() error { // and a new process has been created with the same pid, in this case, the // container would already be stopped. func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) { - startTime, err := system.GetProcessStartTime(initPid) + stat, err := system.Stat(initPid) if err != nil { - return false, newSystemErrorWithCausef(err, "getting init process %d start time", initPid) + return false, newSystemErrorWithCausef(err, "getting init process %d status", initPid) } - if c.initProcessStartTime != startTime { + if c.initProcessStartTime != stat.StartTime { return false, nil } return true, nil @@ -1174,7 +1174,7 @@ func (c *linuxContainer) isPaused() (bool, error) { func (c *linuxContainer) currentState() (*State, error) { var ( - startTime string + startTime uint64 externalDescriptors []string pid = -1 ) diff --git a/libcontainer/container_linux_test.go b/libcontainer/container_linux_test.go index b69e3449..3fdd4a80 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux_test.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux_test.go @@ -52,7 +52,7 @@ func (m *mockCgroupManager) Freeze(state configs.FreezerState) error { type mockProcess struct { _pid int - started string + started uint64 } func (m *mockProcess) terminate() error { @@ -63,7 +63,7 @@ func (m *mockProcess) pid() int { return m._pid } -func (m *mockProcess) startTime() (string, error) { +func (m *mockProcess) startTime() (uint64, error) { return m.started, nil } @@ -150,7 +150,7 @@ func TestGetContainerState(t *testing.T) { }, initProcess: &mockProcess{ _pid: pid, - started: "010", + started: 10, }, cgroupManager: &mockCgroupManager{ pids: []int{1, 2, 3}, @@ -174,8 +174,8 @@ func TestGetContainerState(t *testing.T) { if state.InitProcessPid != pid { t.Fatalf("expected pid %d but received %d", pid, state.InitProcessPid) } - if state.InitProcessStartTime != "010" { - t.Fatalf("expected process start time 010 but received %s", state.InitProcessStartTime) + if state.InitProcessStartTime != 10 { + t.Fatalf("expected process start time 10 but received %d", state.InitProcessStartTime) } paths := state.CgroupPaths if paths == nil { diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index 53df9fa5..1232a7b2 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/process_linux.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/process_linux.go @@ -33,7 +33,7 @@ type parentProcess interface { wait() (*os.ProcessState, error) // startTime returns the process start time. - startTime() (string, error) + startTime() (uint64, error) signal(os.Signal) error @@ -54,8 +54,9 @@ type setnsProcess struct { rootDir *os.File } -func (p *setnsProcess) startTime() (string, error) { - return system.GetProcessStartTime(p.pid()) +func (p *setnsProcess) startTime() (uint64, error) { + stat, err := system.Stat(p.pid()) + return stat.StartTime, err } func (p *setnsProcess) signal(sig os.Signal) error { @@ -406,8 +407,9 @@ func (p *initProcess) terminate() error { return err } -func (p *initProcess) startTime() (string, error) { - return system.GetProcessStartTime(p.pid()) +func (p *initProcess) startTime() (uint64, error) { + stat, err := system.Stat(p.pid()) + return stat.StartTime, err } func (p *initProcess) sendConfig() error { diff --git a/libcontainer/restored_process.go b/libcontainer/restored_process.go index a96f4ca5..408916ad 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/restored_process.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/restored_process.go @@ -17,20 +17,20 @@ func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) { if err != nil { return nil, err } - started, err := system.GetProcessStartTime(pid) + stat, err := system.Stat(pid) if err != nil { return nil, err } return &restoredProcess{ proc: proc, - processStartTime: started, + processStartTime: stat.StartTime, fds: fds, }, nil } type restoredProcess struct { proc *os.Process - processStartTime string + processStartTime uint64 fds []string } @@ -60,7 +60,7 @@ func (p *restoredProcess) wait() (*os.ProcessState, error) { return st, nil } -func (p *restoredProcess) startTime() (string, error) { +func (p *restoredProcess) startTime() (uint64, error) { return p.processStartTime, nil } @@ -81,7 +81,7 @@ func (p *restoredProcess) setExternalDescriptors(newFds []string) { // a persisted state. type nonChildProcess struct { processPid int - processStartTime string + processStartTime uint64 fds []string } @@ -101,7 +101,7 @@ func (p *nonChildProcess) wait() (*os.ProcessState, error) { return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) } -func (p *nonChildProcess) startTime() (string, error) { +func (p *nonChildProcess) startTime() (uint64, error) { return p.processStartTime, nil } From 0d15fca2013888bb496b6239274e8a4c69386642 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 14 Jun 2017 16:41:16 -0700 Subject: [PATCH 3/6] libcontainer/system/proc: Add Stat_t.State And Stat_t.PID and Stat_t.Name while we're at it. Then use the new .State property in runType to distinguish between running and zombie/dead processes, since kill(2) does not [1]. With this change we no longer claim Running status for zombie/dead processes. I've also removed the kill(2) call from runType. It was originally added in 13841ef3 (new-api: return the Running state only if the init process is alive, 2014-12-23), but we've been accessing /proc/[pid]/stat since 14e95b2a (Make state detection precise, 2016-07-05, #930), and with the /stat access the kill(2) check is redundant. I also don't see much point to the previously-separate doesInitProcessExist, so I've inlined that logic in runType. It would be nice to distinguish between "/proc/[pid]/stat doesn't exist" and errors parsing its contents, but I've skipped that for the moment. The Running -> Stopped change in checkpoint_test.go is because the post-checkpoint process is a zombie, and with this commit zombie processes are Stopped (and no longer Running). [1]: https://github.com/opencontainers/runc/pull/1483#issuecomment-307527789 Signed-off-by: W. Trevor King (cherry picked from commit 2bea4c897e68475c0e698f7ebec4ba989ea0cda0) Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 33 +------ libcontainer/integration/checkpoint_test.go | 2 +- libcontainer/system/proc.go | 103 ++++++++++++++------ libcontainer/system/proc_test.go | 41 ++++++-- 4 files changed, 114 insertions(+), 65 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 98efcfcc..0d478702 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go @@ -1112,40 +1112,17 @@ func (c *linuxContainer) refreshState() error { return c.state.transition(&stoppedState{c: c}) } -// doesInitProcessExist checks if the init process is still the same process -// as the initial one, it could happen that the original process has exited -// and a new process has been created with the same pid, in this case, the -// container would already be stopped. -func (c *linuxContainer) doesInitProcessExist(initPid int) (bool, error) { - stat, err := system.Stat(initPid) - if err != nil { - return false, newSystemErrorWithCausef(err, "getting init process %d status", initPid) - } - if c.initProcessStartTime != stat.StartTime { - return false, nil - } - return true, nil -} - func (c *linuxContainer) runType() (Status, error) { if c.initProcess == nil { return Stopped, nil } pid := c.initProcess.pid() - // return Running if the init process is alive - if err := syscall.Kill(pid, 0); err != nil { - if err == syscall.ESRCH { - // It means the process does not exist anymore, could happen when the - // process exited just when we call the function, we should not return - // error in this case. - return Stopped, nil - } - return Stopped, newSystemErrorWithCausef(err, "sending signal 0 to pid %d", pid) + stat, err := system.Stat(pid) + if err != nil { + return Stopped, nil } - // check if the process is still the original init process. - exist, err := c.doesInitProcessExist(pid) - if !exist || err != nil { - return Stopped, err + if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead { + return Stopped, nil } // check if the process that is running is the init process or the user's process. // this is the difference between the container Running and Created. diff --git a/libcontainer/integration/checkpoint_test.go b/libcontainer/integration/checkpoint_test.go index 5fe09d5e..ea1853f4 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/integration/checkpoint_test.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/integration/checkpoint_test.go @@ -130,7 +130,7 @@ func TestCheckpoint(t *testing.T) { t.Fatal(err) } - if state != libcontainer.Running { + if state != libcontainer.Stopped { t.Fatal("Unexpected state checkpoint: ", state) } diff --git a/libcontainer/system/proc.go b/libcontainer/system/proc.go index 4ffb8331..79232a43 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc.go @@ -8,9 +8,55 @@ import ( "strings" ) +// State is the status of a process. +type State rune + +const ( // Only values for Linux 3.14 and later are listed here + Dead State = 'X' + DiskSleep State = 'D' + Running State = 'R' + Sleeping State = 'S' + Stopped State = 'T' + TracingStop State = 't' + Zombie State = 'Z' +) + +// String forms of the state from proc(5)'s documentation for +// /proc/[pid]/status' "State" field. +func (s State) String() string { + switch s { + case Dead: + return "dead" + case DiskSleep: + return "disk sleep" + case Running: + return "running" + case Sleeping: + return "sleeping" + case Stopped: + return "stopped" + case TracingStop: + return "tracing stop" + case Zombie: + return "zombie" + default: + return fmt.Sprintf("unknown (%c)", s) + } +} + // Stat_t represents the information from /proc/[pid]/stat, as -// described in proc(5). +// described in proc(5) with names based on the /proc/[pid]/status +// fields. type Stat_t struct { + // PID is the process ID. + PID uint + + // Name is the command run by the process. + Name string + + // State is the state of the process. + State State + // StartTime is the number of clock ticks after system boot (since // Linux 2.6). StartTime uint64 @@ -22,9 +68,7 @@ func Stat(pid int) (stat Stat_t, err error) { if err != nil { return stat, err } - data := string(bytes) - stat.StartTime, err = parseStartTime(data) - return stat, err + return parseStat(string(bytes)) } // GetProcessStartTime is deprecated. Use Stat(pid) and @@ -37,30 +81,33 @@ func GetProcessStartTime(pid int) (string, error) { return fmt.Sprintf("%d", stat.StartTime), nil } -func parseStartTime(stat string) (uint64, error) { - // the starttime is located at pos 22 - // from the man page - // - // starttime %llu (was %lu before Linux 2.6) - // (22) The time the process started after system boot. In kernels before Linux 2.6, this - // value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks - // (divide by sysconf(_SC_CLK_TCK)). - // - // NOTE: - // pos 2 could contain space and is inside `(` and `)`: - // (2) comm %s - // The filename of the executable, in parentheses. - // This is visible whether or not the executable is - // swapped out. - // - // the following is an example: +func parseStat(data string) (stat Stat_t, err error) { + // From proc(5), field 2 could contain space and is inside `(` and `)`. + // The following is an example: // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 + i := strings.LastIndex(data, ")") + if i <= 2 || i >= len(data)-1 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } + + parts := strings.SplitN(data[:i], "(", 2) + if len(parts) != 2 { + return stat, fmt.Errorf("invalid stat data: %q", data) + } + + stat.Name = parts[1] + _, err = fmt.Sscanf(parts[0], "%d", &stat.PID) + if err != nil { + return stat, err + } - // get parts after last `)`: - s := strings.Split(stat, ")") - parts := strings.Split(strings.TrimSpace(s[len(s)-1]), " ") - startTimeString := parts[22-3] // starts at 3 (after the filename pos `2`) - var startTime uint64 - fmt.Sscanf(startTimeString, "%d", &startTime) - return startTime, nil + // parts indexes should be offset by 3 from the field number given + // proc(5), because parts is zero-indexed and we've removed fields + // one (PID) and two (Name) in the paren-split. + parts = strings.Split(data[i+2:], " ") + var state int + fmt.Sscanf(parts[3-3], "%c", &state) + stat.State = State(state) + fmt.Sscanf(parts[22-3], "%d", &stat.StartTime) + return stat, nil } diff --git a/libcontainer/system/proc_test.go b/libcontainer/system/proc_test.go index c7615ef9..7e1acc5b 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc_test.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/system/proc_test.go @@ -3,18 +3,43 @@ package system import "testing" func TestParseStartTime(t *testing.T) { - data := map[string]uint64{ - "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": 9126532, - "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": 9214966, - "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": 8722075, + data := map[string]Stat_t{ + "4902 (gunicorn: maste) S 4885 4902 4902 0 -1 4194560 29683 29929 61 83 78 16 96 17 20 0 1 0 9126532 52965376 1903 18446744073709551615 4194304 7461796 140733928751520 140733928698072 139816984959091 0 0 16781312 137447943 1 0 0 17 3 0 0 9 0 0 9559488 10071156 33050624 140733928758775 140733928758945 140733928758945 140733928759264 0": { + PID: 4902, + Name: "gunicorn: maste", + State: 'S', + StartTime: 9126532, + }, + "9534 (cat) R 9323 9534 9323 34828 9534 4194304 95 0 0 0 0 0 0 0 20 0 1 0 9214966 7626752 168 18446744073709551615 4194304 4240332 140732237651568 140732237650920 140570710391216 0 0 0 0 0 0 0 17 1 0 0 0 0 0 6340112 6341364 21553152 140732237653865 140732237653885 140732237653885 140732237656047 0": { + PID: 9534, + Name: "cat", + State: 'R', + StartTime: 9214966, + }, + + "24767 (irq/44-mei_me) S 2 0 0 0 -1 2129984 0 0 0 0 0 0 0 0 -51 0 1 0 8722075 0 0 18446744073709551615 0 0 0 0 0 0 0 2147483647 0 0 0 0 17 1 50 1 0 0 0 0 0 0 0 0 0 0 0": { + PID: 24767, + Name: "irq/44-mei_me", + State: 'S', + StartTime: 8722075, + }, } - for line, startTime := range data { - st, err := parseStartTime(line) + for line, expected := range data { + st, err := parseStat(line) if err != nil { t.Fatal(err) } - if startTime != st { - t.Fatalf("expected start time %q but received %q", startTime, st) + if st.PID != expected.PID { + t.Fatalf("expected PID %q but received %q", expected.PID, st.PID) + } + if st.State != expected.State { + t.Fatalf("expected state %q but received %q", expected.State, st.State) + } + if st.Name != expected.Name { + t.Fatalf("expected name %q but received %q", expected.Name, st.Name) + } + if st.StartTime != expected.StartTime { + t.Fatalf("expected start time %q but received %q", expected.StartTime, st.StartTime) } } } From 9c340aade60d5b8ca0916b78340a199265129b8d Mon Sep 17 00:00:00 2001 From: Will Martin Date: Mon, 22 Jan 2018 17:03:02 +0000 Subject: [PATCH 4/6] Avoid race when opening exec fifo When starting a container with `runc start` or `runc run`, the stub process (runc[2:INIT]) opens a fifo for writing. Its parent runc process will open the same fifo for reading. In this way, they synchronize. If the stub process exits at the wrong time, the parent runc process will block forever. This can happen when racing 2 runc operations against each other: `runc run/start`, and `runc delete`. It could also happen for other reasons, e.g. the kernel's OOM killer may select the stub process. This commit resolves this race by racing the opening of the exec fifo from the runc parent process against the stub process exiting. If the stub process exits before we open the fifo, we return an error. Another solution is to wait on the stub process. However, it seems it would require more refactoring to avoid calling wait multiple times on the same process, which is an error. Signed-off-by: Craig Furman (cherry picked from commit 8d3e6c9826815cf6f75dbd56c7b5a23fb5666108) Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 69 ++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 0d478702..9e128fec 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go @@ -5,6 +5,7 @@ package libcontainer import ( "bytes" "encoding/json" + "errors" "fmt" "io" "io/ioutil" @@ -206,20 +207,70 @@ func (c *linuxContainer) Exec() error { func (c *linuxContainer) exec() error { path := filepath.Join(c.root, execFifoFilename) - f, err := os.OpenFile(path, os.O_RDONLY, 0) - if err != nil { - return newSystemErrorWithCause(err, "open exec fifo for reading") + + fifoOpen := make(chan struct{}) + select { + case <-awaitProcessExit(c.initProcess.pid(), fifoOpen): + return errors.New("container process is already dead") + case result := <-awaitFifoOpen(path): + close(fifoOpen) + if result.err != nil { + return result.err + } + f := result.file + defer f.Close() + if err := readFromExecFifo(f); err != nil { + return err + } + return os.Remove(path) } - defer f.Close() - data, err := ioutil.ReadAll(f) +} + +func readFromExecFifo(execFifo io.Reader) error { + data, err := ioutil.ReadAll(execFifo) if err != nil { return err } - if len(data) > 0 { - os.Remove(path) - return nil + if len(data) <= 0 { + return fmt.Errorf("cannot start an already running container") } - return fmt.Errorf("cannot start an already running container") + return nil +} + +func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} { + isDead := make(chan struct{}) + go func() { + for { + select { + case <-exit: + return + case <-time.After(time.Millisecond * 100): + stat, err := system.Stat(pid) + if err != nil || stat.State == system.Zombie { + close(isDead) + return + } + } + } + }() + return isDead +} + +func awaitFifoOpen(path string) <-chan openResult { + fifoOpened := make(chan openResult) + go func() { + f, err := os.OpenFile(path, os.O_RDONLY, 0) + if err != nil { + fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} + } + fifoOpened <- openResult{file: f} + }() + return fifoOpened +} + +type openResult struct { + file *os.File + err error } func (c *linuxContainer) start(process *Process) error { From aba6dc87301b63d06691d7f867bdebf2b291fac1 Mon Sep 17 00:00:00 2001 From: Ed King Date: Tue, 23 Jan 2018 10:46:31 +0000 Subject: [PATCH 5/6] Return from goroutine when it should terminate Signed-off-by: Craig Furman (cherry picked from commit 5c0af14bf8925b845d91cb94fc1d5ab18140a81a) Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 1 + 1 file changed, 1 insertion(+) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 9e128fec..d5a30496 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go @@ -262,6 +262,7 @@ func awaitFifoOpen(path string) <-chan openResult { f, err := os.OpenFile(path, os.O_RDONLY, 0) if err != nil { fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} + return } fifoOpened <- openResult{file: f} }() From 9471a96c113b05f0a0a84c9afd8e3979e7409508 Mon Sep 17 00:00:00 2001 From: Jordan Liggitt Date: Wed, 18 Dec 2019 15:20:53 +0000 Subject: [PATCH 6/6] Fix race checking for process exit and waiting for exec fifo Signed-off-by: Jordan Liggitt (cherry picked from commit 8541d9cf3d8b6d46614cc41aaf38eaa2419549df) Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 83 +++++++++++++++++---------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index d5a30496..fda163da 100644 --- docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go +++ docker-7d71120b1f31f8bb4da733b5f1e5960b434696de/runc-8891bca22c049cd2dcf13ba2438c0bac8d7f3343/libcontainer/container_linux.go @@ -207,22 +207,24 @@ func (c *linuxContainer) Exec() error { func (c *linuxContainer) exec() error { path := filepath.Join(c.root, execFifoFilename) - - fifoOpen := make(chan struct{}) - select { - case <-awaitProcessExit(c.initProcess.pid(), fifoOpen): - return errors.New("container process is already dead") - case result := <-awaitFifoOpen(path): - close(fifoOpen) - if result.err != nil { - return result.err - } - f := result.file - defer f.Close() - if err := readFromExecFifo(f); err != nil { - return err + pid := c.initProcess.pid() + blockingFifoOpenCh := awaitFifoOpen(path) + for { + select { + case result := <-blockingFifoOpenCh: + return handleFifoResult(result) + + case <-time.After(time.Millisecond * 100): + stat, err := system.Stat(pid) + if err != nil || stat.State == system.Zombie { + // could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check. + // see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete). + if err := handleFifoResult(fifoOpen(path, false)); err != nil { + return errors.New("container process is already dead") + } + return nil + } } - return os.Remove(path) } } @@ -237,38 +239,39 @@ func readFromExecFifo(execFifo io.Reader) error { return nil } -func awaitProcessExit(pid int, exit <-chan struct{}) <-chan struct{} { - isDead := make(chan struct{}) - go func() { - for { - select { - case <-exit: - return - case <-time.After(time.Millisecond * 100): - stat, err := system.Stat(pid) - if err != nil || stat.State == system.Zombie { - close(isDead) - return - } - } - } - }() - return isDead -} - func awaitFifoOpen(path string) <-chan openResult { fifoOpened := make(chan openResult) go func() { - f, err := os.OpenFile(path, os.O_RDONLY, 0) - if err != nil { - fifoOpened <- openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} - return - } - fifoOpened <- openResult{file: f} + result := fifoOpen(path, true) + fifoOpened <- result }() return fifoOpened } +func fifoOpen(path string, block bool) openResult { + flags := os.O_RDONLY + if !block { + flags |= syscall.O_NONBLOCK + } + f, err := os.OpenFile(path, flags, 0) + if err != nil { + return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} + } + return openResult{file: f} +} + +func handleFifoResult(result openResult) error { + if result.err != nil { + return result.err + } + f := result.file + defer f.Close() + if err := readFromExecFifo(f); err != nil { + return err + } + return os.Remove(f.Name()) +} + type openResult struct { file *os.File err error