|
|
e293be |
centosplus patch bug#15115
|
|
|
e293be |
|
|
|
e293be |
|
|
|
e293be |
commit 6d8409580bee356ce418dcb94260b24dda639934
|
|
|
e293be |
Author: Michal Hocko <mhocko@suse.com>
|
|
|
e293be |
Date: Mon Dec 12 16:42:23 2016 -0800
|
|
|
e293be |
|
|
|
e293be |
mm, mempolicy: clean up __GFP_THISNODE confusion in policy_zonelist
|
|
|
e293be |
|
|
|
e293be |
__GFP_THISNODE is documented to enforce the allocation to be satisified
|
|
|
e293be |
from the requested node with no fallbacks or placement policy
|
|
|
e293be |
enforcements. policy_zonelist seemingly breaks this semantic if the
|
|
|
e293be |
current policy is MPOL_MBIND and instead of taking the node it will
|
|
|
e293be |
fallback to the first node in the mask if the requested one is not in
|
|
|
e293be |
the mask. This is confusing to say the least because it fact we
|
|
|
e293be |
shouldn't ever go that path. First tasks shouldn't be scheduled on CPUs
|
|
|
e293be |
with nodes outside of their mempolicy binding. And secondly
|
|
|
e293be |
policy_zonelist is called only from 3 places:
|
|
|
e293be |
|
|
|
e293be |
- huge_zonelist - never should do __GFP_THISNODE when going this path
|
|
|
e293be |
|
|
|
e293be |
- alloc_pages_vma - which shouldn't depend on __GFP_THISNODE either
|
|
|
e293be |
|
|
|
e293be |
- alloc_pages_current - which uses default_policy id __GFP_THISNODE is
|
|
|
e293be |
used
|
|
|
e293be |
|
|
|
e293be |
So we shouldn't even need to care about this possibility and can drop
|
|
|
e293be |
the confusing code. Let's keep a WARN_ON_ONCE in place to catch
|
|
|
e293be |
potential users and fix them up properly (aka use a different allocation
|
|
|
e293be |
function which ignores mempolicy).
|
|
|
e293be |
|
|
|
e293be |
[akpm@linux-foundation.org: coding-style fixes]
|
|
|
e293be |
Link: http://lkml.kernel.org/r/20161013125958.32155-1-mhocko@kernel.org
|
|
|
e293be |
Signed-off-by: Michal Hocko <mhocko@suse.com>
|
|
|
e293be |
Acked-by: Vlastimil Babka <vbabka@suse.cz>
|
|
|
e293be |
Cc: Mel Gorman <mgorman@suse.de>
|
|
|
e293be |
Cc: David Rientjes <rientjes@google.com>
|
|
|
e293be |
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
|
|
|
e293be |
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
|
|
|
e293be |
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
|
|
|
e293be |
|
|
|
e293be |
Applied-by: Akemi Yagi <toracat@centos.org>
|
|
|
e293be |
|
|
|
e293be |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
|
|
|
e293be |
index a6a27e5..4d58021 100644
|
|
|
e293be |
--- a/mm/mempolicy.c
|
|
|
e293be |
+++ b/mm/mempolicy.c
|
|
|
e293be |
@@ -1679,25 +1679,17 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy)
|
|
|
e293be |
static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy,
|
|
|
e293be |
int nd)
|
|
|
e293be |
{
|
|
|
e293be |
- switch (policy->mode) {
|
|
|
e293be |
- case MPOL_PREFERRED:
|
|
|
e293be |
- if (!(policy->flags & MPOL_F_LOCAL))
|
|
|
e293be |
- nd = policy->v.preferred_node;
|
|
|
e293be |
- break;
|
|
|
e293be |
- case MPOL_BIND:
|
|
|
e293be |
+ if (policy->mode == MPOL_PREFERRED && !(policy->flags & MPOL_F_LOCAL))
|
|
|
e293be |
+ nd = policy->v.preferred_node;
|
|
|
e293be |
+ else {
|
|
|
e293be |
/*
|
|
|
e293be |
- * Normally, MPOL_BIND allocations are node-local within the
|
|
|
e293be |
- * allowed nodemask. However, if __GFP_THISNODE is set and the
|
|
|
e293be |
- * current node isn't part of the mask, we use the zonelist for
|
|
|
e293be |
- * the first node in the mask instead.
|
|
|
e293be |
+ * __GFP_THISNODE shouldn't even be used with the bind policy
|
|
|
e293be |
+ * because we might easily break the expectation to stay on the
|
|
|
e293be |
+ * requested node and not break the policy.
|
|
|
e293be |
*/
|
|
|
e293be |
- if (unlikely(gfp & __GFP_THISNODE) &&
|
|
|
e293be |
- unlikely(!node_isset(nd, policy->v.nodes)))
|
|
|
e293be |
- nd = first_node(policy->v.nodes);
|
|
|
e293be |
- break;
|
|
|
e293be |
- default:
|
|
|
e293be |
- BUG();
|
|
|
e293be |
+ WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE));
|
|
|
e293be |
}
|
|
|
e293be |
+
|
|
|
e293be |
return node_zonelist(nd, gfp);
|
|
|
e293be |
}
|
|
|
e293be |
|