| centosplus patch bug#15115 |
| |
| |
| commit 6d8409580bee356ce418dcb94260b24dda639934 |
| Author: Michal Hocko <mhocko@suse.com> |
| Date: Mon Dec 12 16:42:23 2016 -0800 |
| |
| mm, mempolicy: clean up __GFP_THISNODE confusion in policy_zonelist |
| |
| __GFP_THISNODE is documented to enforce the allocation to be satisified |
| from the requested node with no fallbacks or placement policy |
| enforcements. policy_zonelist seemingly breaks this semantic if the |
| current policy is MPOL_MBIND and instead of taking the node it will |
| fallback to the first node in the mask if the requested one is not in |
| the mask. This is confusing to say the least because it fact we |
| shouldn't ever go that path. First tasks shouldn't be scheduled on CPUs |
| with nodes outside of their mempolicy binding. And secondly |
| policy_zonelist is called only from 3 places: |
| |
| - huge_zonelist - never should do __GFP_THISNODE when going this path |
| |
| - alloc_pages_vma - which shouldn't depend on __GFP_THISNODE either |
| |
| - alloc_pages_current - which uses default_policy id __GFP_THISNODE is |
| used |
| |
| So we shouldn't even need to care about this possibility and can drop |
| the confusing code. Let's keep a WARN_ON_ONCE in place to catch |
| potential users and fix them up properly (aka use a different allocation |
| function which ignores mempolicy). |
| |
| [akpm@linux-foundation.org: coding-style fixes] |
| Link: http://lkml.kernel.org/r/20161013125958.32155-1-mhocko@kernel.org |
| Signed-off-by: Michal Hocko <mhocko@suse.com> |
| Acked-by: Vlastimil Babka <vbabka@suse.cz> |
| Cc: Mel Gorman <mgorman@suse.de> |
| Cc: David Rientjes <rientjes@google.com> |
| Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com> |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| |
| Applied-by: Akemi Yagi <toracat@centos.org> |
| |
| diff --git a/mm/mempolicy.c b/mm/mempolicy.c |
| index a6a27e5..4d58021 100644 |
| |
| |
| @@ -1679,25 +1679,17 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) |
| static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy, |
| int nd) |
| { |
| - switch (policy->mode) { |
| - case MPOL_PREFERRED: |
| - if (!(policy->flags & MPOL_F_LOCAL)) |
| - nd = policy->v.preferred_node; |
| - break; |
| - case MPOL_BIND: |
| + if (policy->mode == MPOL_PREFERRED && !(policy->flags & MPOL_F_LOCAL)) |
| + nd = policy->v.preferred_node; |
| + else { |
| /* |
| - * Normally, MPOL_BIND allocations are node-local within the |
| - * allowed nodemask. However, if __GFP_THISNODE is set and the |
| - * current node isn't part of the mask, we use the zonelist for |
| - * the first node in the mask instead. |
| + * __GFP_THISNODE shouldn't even be used with the bind policy |
| + * because we might easily break the expectation to stay on the |
| + * requested node and not break the policy. |
| */ |
| - if (unlikely(gfp & __GFP_THISNODE) && |
| - unlikely(!node_isset(nd, policy->v.nodes))) |
| - nd = first_node(policy->v.nodes); |
| - break; |
| - default: |
| - BUG(); |
| + WARN_ON_ONCE(policy->mode == MPOL_BIND && (gfp & __GFP_THISNODE)); |
| } |
| + |
| return node_zonelist(nd, gfp); |
| } |
| |