Blob Blame History Raw
From 6c3e409c5de00ba108392be3e6502d5a49c235ce Mon Sep 17 00:00:00 2001
From: Tom Deseyn <tom.deseyn@gmail.com>
Date: Thu, 16 Aug 2018 20:47:20 +0200
Subject: [PATCH] Determine memory load based on cgroup usage. (#19518)

cgroup usage is used to trigger oom kills. It includes rss and file cache
of the cgroup.

The implementation was only using the process rss to determine memory load.
This is less than the cgroup usage and leads to oom kills due to GC not
being triggered soon enough.
---
 src/gc/unix/cgroup.cpp      | 31 +++++++++++++++++++++++++++++--
 src/gc/unix/gcenv.unix.cpp  |  4 ++--
 src/pal/inc/pal.h           |  2 +-
 src/pal/src/misc/cgroup.cpp | 30 +++++++++++++++++++++++++++++-
 src/vm/gcenv.os.cpp         |  2 +-
 5 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/src/gc/unix/cgroup.cpp b/src/gc/unix/cgroup.cpp
index f892a339d8a..a3307ce8a53 100644
--- a/src/gc/unix/cgroup.cpp
+++ b/src/gc/unix/cgroup.cpp
@@ -33,6 +33,7 @@ Module Name:
 #define PROC_CGROUP_FILENAME "/proc/self/cgroup"
 #define PROC_STATM_FILENAME "/proc/self/statm"
 #define MEM_LIMIT_FILENAME "/memory.limit_in_bytes"
+#define MEM_USAGE_FILENAME "/memory.usage_in_bytes"
 #define CFS_QUOTA_FILENAME "/cpu.cfs_quota_us"
 #define CFS_PERIOD_FILENAME "/cpu.cfs_period_us"
 
@@ -74,6 +75,27 @@ class CGroup
         return result;
     }
 
+    bool GetPhysicalMemoryUsage(size_t *val)
+    {
+        char *mem_usage_filename = nullptr;
+        bool result = false;
+
+        if (m_memory_cgroup_path == nullptr)
+            return result;
+
+        size_t len = strlen(m_memory_cgroup_path);
+        len += strlen(MEM_USAGE_FILENAME);
+        mem_usage_filename = (char*)malloc(len+1);
+        if (mem_usage_filename == nullptr)
+            return result;
+
+        strcpy(mem_usage_filename, m_memory_cgroup_path);
+        strcat(mem_usage_filename, MEM_USAGE_FILENAME);
+        result = ReadMemoryValueFromFile(mem_usage_filename, val);
+        free(mem_usage_filename);
+        return result;
+    }
+
     bool GetCpuLimit(uint32_t *val)
     {
         long long quota;
@@ -427,19 +449,24 @@ size_t GetRestrictedPhysicalMemoryLimit()
     return physical_memory_limit;
 }
 
-bool GetWorkingSetSize(size_t* val)
+bool GetPhysicalMemoryUsed(size_t* val)
 {
     bool result = false;
     size_t linelen;
     char* line = nullptr;
+    CGroup cgroup;
 
     if (val == nullptr)
         return false;
 
+    // Linux uses cgroup usage to trigger oom kills.
+    if (cgroup.GetPhysicalMemoryUsage(val))
+        return true;
+
+    // process resident set size.
     FILE* file = fopen(PROC_STATM_FILENAME, "r");
     if (file != nullptr && getline(&line, &linelen, file) != -1)
     {
-
         char* context = nullptr;
         char* strTok = strtok_r(line, " ", &context); 
         strTok = strtok_r(nullptr, " ", &context); 
diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp
index f34dd8993b2..a1e12961ada 100644
--- a/src/gc/unix/gcenv.unix.cpp
+++ b/src/gc/unix/gcenv.unix.cpp
@@ -56,7 +56,7 @@ static uint8_t* g_helperPage = 0;
 static pthread_mutex_t g_flushProcessWriteBuffersMutex;
 
 size_t GetRestrictedPhysicalMemoryLimit();
-bool GetWorkingSetSize(size_t* val);
+bool GetPhysicalMemoryUsed(size_t* val);
 bool GetCpuLimit(uint32_t* val);
 
 static size_t g_RestrictedPhysicalMemoryLimit = 0;
@@ -623,7 +623,7 @@ void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available
 
         // Get the physical memory in use - from it, we can get the physical memory available.
         // We do this only when we have the total physical memory available.
-        if (total > 0 && GetWorkingSetSize(&used))
+        if (total > 0 && GetPhysicalMemoryUsed(&used))
         {
             available = total > used ? total-used : 0; 
             load = (uint32_t)(((float)used * 100) / (float)total);
diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h
index 60f4a81c66e..5106c014211 100644
--- a/src/pal/inc/pal.h
+++ b/src/pal/inc/pal.h
@@ -2364,7 +2364,7 @@ PAL_GetRestrictedPhysicalMemoryLimit(VOID);
 PALIMPORT
 BOOL
 PALAPI
-PAL_GetWorkingSetSize(size_t* val);
+PAL_GetPhysicalMemoryUsed(size_t* val);
 
 PALIMPORT
 BOOL
diff --git a/src/pal/src/misc/cgroup.cpp b/src/pal/src/misc/cgroup.cpp
index 7a3a9261a11..145586a0b98 100644
--- a/src/pal/src/misc/cgroup.cpp
+++ b/src/pal/src/misc/cgroup.cpp
@@ -23,6 +23,7 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC);
 #define PROC_CGROUP_FILENAME "/proc/self/cgroup"
 #define PROC_STATM_FILENAME "/proc/self/statm"
 #define MEM_LIMIT_FILENAME "/memory.limit_in_bytes"
+#define MEM_USAGE_FILENAME "/memory.usage_in_bytes"
 #define CFS_QUOTA_FILENAME "/cpu.cfs_quota_us"
 #define CFS_PERIOD_FILENAME "/cpu.cfs_period_us"
 class CGroup
@@ -63,6 +64,27 @@ class CGroup
         return result;
     }
 
+    bool GetPhysicalMemoryUsage(size_t *val)
+    {
+        char *mem_usage_filename = nullptr;
+        bool result = false;
+
+        if (m_memory_cgroup_path == nullptr)
+            return result;
+
+        size_t len = strlen(m_memory_cgroup_path);
+        len += strlen(MEM_USAGE_FILENAME);
+        mem_usage_filename = (char*)malloc(len+1);
+        if (mem_usage_filename == nullptr)
+            return result;
+
+        strcpy(mem_usage_filename, m_memory_cgroup_path);
+        strcat(mem_usage_filename, MEM_USAGE_FILENAME);
+        result = ReadMemoryValueFromFile(mem_usage_filename, val);
+        free(mem_usage_filename);
+        return result;
+    }
+
     bool GetCpuLimit(UINT *val)
     {
         long long quota;
@@ -384,15 +406,21 @@ PAL_GetRestrictedPhysicalMemoryLimit()
 
 BOOL
 PALAPI
-PAL_GetWorkingSetSize(size_t* val)
+PAL_GetPhysicalMemoryUsed(size_t* val)
 {
     BOOL result = false;
     size_t linelen;
     char* line = nullptr;
+    CGroup cgroup;
 
     if (val == nullptr)
         return FALSE;
 
+    // Linux uses cgroup usage to trigger oom kills.
+    if (cgroup.GetPhysicalMemoryUsage(val))
+        return TRUE;
+
+    // process resident set size.
     FILE* file = fopen(PROC_STATM_FILENAME, "r");
     if (file != nullptr && getline(&line, &linelen, file) != -1)
     {
diff --git a/src/vm/gcenv.os.cpp b/src/vm/gcenv.os.cpp
index 70dc2619dd9..99e9ff6c727 100644
--- a/src/vm/gcenv.os.cpp
+++ b/src/vm/gcenv.os.cpp
@@ -605,7 +605,7 @@ void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available
             workingSetSize = pmc.WorkingSetSize;
         }
 #else
-        status = PAL_GetWorkingSetSize(&workingSetSize);
+        status = PAL_GetPhysicalMemoryUsed(&workingSetSize);
 #endif
         if(status)
         {