diff --git a/SOURCES/0021-btl-vader-ensure-that-the-send-tag-is-always-written.patch b/SOURCES/0021-btl-vader-ensure-that-the-send-tag-is-always-written.patch new file mode 100644 index 0000000..90439ab --- /dev/null +++ b/SOURCES/0021-btl-vader-ensure-that-the-send-tag-is-always-written.patch @@ -0,0 +1,63 @@ +From 1fa5e66dbc1aac73221e5c4c9f9be899921706aa Mon Sep 17 00:00:00 2001 +From: Nathan Hjelm +Date: Tue, 11 Sep 2018 10:27:31 -0600 +Subject: [PATCH 21/52] btl/vader: ensure that the send tag is always written + last + +To ensure fast box entries are complete when processed by the +receiving process the tag must be written last. This includes a zero +header for the next fast box entry (in some cases). This commit fixes +two instances where the tag was written too early. In one case, on +32-bit systems it is possible for the tag part of the header to be +written before the size. The second instance is an ordering issue. The +zero header was being written after the fastbox header. + +Fixes #5375, #5638 + +Signed-off-by: Nathan Hjelm +(cherry picked from commit 850fbff441756b2f9cde1007ead3e37ce22599c2) +Signed-off-by: Nathan Hjelm +--- + opal/mca/btl/vader/btl_vader_fbox.h | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h +index 7c0cdd5..df1fb92 100644 +--- a/opal/mca/btl/vader/btl_vader_fbox.h ++++ b/opal/mca/btl/vader/btl_vader_fbox.h +@@ -50,9 +50,10 @@ void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endp + static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t tag, + uint16_t seq, uint32_t size) + { +- mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}}; +- opal_atomic_wmb (); ++ mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = 0, .seq = seq, .size = size}}; + hdr->ival = tmp.ival; ++ opal_atomic_wmb (); ++ hdr->data.tag = tag; + } + + /* attempt to reserve a contiguous segment from the remote ep */ +@@ -138,9 +139,6 @@ static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsign + memcpy (data + header_size, payload, payload_size); + } + +- /* write out part of the header now. the tag will be written when the data is available */ +- mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), tag, ep->fbox_out.seq++, data_size); +- + end += size; + + if (OPAL_UNLIKELY(fbox_size == end)) { +@@ -152,6 +150,9 @@ static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsign + MCA_BTL_VADER_FBOX_HDR(ep->fbox_out.buffer + end)->ival = 0; + } + ++ /* write out part of the header now. the tag will be written when the data is available */ ++ mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), tag, ep->fbox_out.seq++, data_size); ++ + /* align the buffer */ + ep->fbox_out.end = ((uint32_t) hbs << 31) | end; + opal_atomic_wmb (); +-- +1.8.3.1 + diff --git a/SOURCES/0038-btl-vader-ensure-the-fast-box-tag-is-always-read-fir.patch b/SOURCES/0038-btl-vader-ensure-the-fast-box-tag-is-always-read-fir.patch new file mode 100644 index 0000000..2837f4b --- /dev/null +++ b/SOURCES/0038-btl-vader-ensure-the-fast-box-tag-is-always-read-fir.patch @@ -0,0 +1,49 @@ +From e43320ddc3a316e3410d6dde56062cf8968ea284 Mon Sep 17 00:00:00 2001 +From: Nathan Hjelm +Date: Tue, 2 Oct 2018 15:52:45 -0600 +Subject: [PATCH 38/52] btl/vader: ensure the fast box tag is always read first + +On some platfoms reading a 64-bit value is non-atomic and it is +possible that the two 32-bit values are read in the wrong order. To +ensure the tag is always read first this commit reads the tag before +reading the full 64-bit value. + +Signed-off-by: Nathan Hjelm +(cherry picked from commit 66a7dc4c72cb25df67e7f872bee7a20b5fa9c763) +--- + opal/mca/btl/vader/btl_vader_fbox.h | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h +index df1fb92..6df9a2d 100644 +--- a/opal/mca/btl/vader/btl_vader_fbox.h ++++ b/opal/mca/btl/vader/btl_vader_fbox.h +@@ -56,6 +56,16 @@ static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, + hdr->data.tag = tag; + } + ++static inline mca_btl_vader_fbox_hdr_t mca_btl_vader_fbox_read_header (mca_btl_vader_fbox_hdr_t *hdr) ++{ ++ mca_btl_vader_fbox_hdr_t tmp; ++ uint16_t tag = hdr->data.tag; ++ opal_atomic_rmb (); ++ tmp.ival = hdr->ival; ++ tmp.data.tag = tag; ++ return tmp; ++} ++ + /* attempt to reserve a contiguous segment from the remote ep */ + static inline bool mca_btl_vader_fbox_sendi (mca_btl_base_endpoint_t *ep, unsigned char tag, + void * restrict header, const size_t header_size, +@@ -175,7 +185,7 @@ static inline bool mca_btl_vader_check_fboxes (void) + int poll_count; + + for (poll_count = 0 ; poll_count <= MCA_BTL_VADER_POLL_COUNT ; ++poll_count) { +- const mca_btl_vader_fbox_hdr_t hdr = {.ival = MCA_BTL_VADER_FBOX_HDR(ep->fbox_in.buffer + start)->ival}; ++ const mca_btl_vader_fbox_hdr_t hdr = mca_btl_vader_fbox_read_header (MCA_BTL_VADER_FBOX_HDR(ep->fbox_in.buffer + start)); + + /* check for a valid tag a sequence number */ + if (0 == hdr.data.tag || hdr.data.seq != ep->fbox_in.seq) { +-- +1.8.3.1 + diff --git a/SOURCES/0040-btl-vader-fix-race-condition-in-writing-header.patch b/SOURCES/0040-btl-vader-fix-race-condition-in-writing-header.patch new file mode 100644 index 0000000..16ecbba --- /dev/null +++ b/SOURCES/0040-btl-vader-fix-race-condition-in-writing-header.patch @@ -0,0 +1,58 @@ +From 5d658937ce874422c66ac4c7d9723df8bb69ecd7 Mon Sep 17 00:00:00 2001 +From: Nathan Hjelm +Date: Fri, 5 Oct 2018 16:30:06 -0600 +Subject: [PATCH 40/52] btl/vader: fix race condition in writing header + +Signed-off-by: Nathan Hjelm +(cherry picked from commit 8291f6722d890efd15333bf7b26f0d07952fa41e) +Signed-off-by: Nathan Hjelm +--- + opal/mca/btl/vader/btl_vader_fbox.h | 19 ++++++++++++------- + 1 file changed, 12 insertions(+), 7 deletions(-) + +diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h +index 6df9a2d..4ebfde8 100644 +--- a/opal/mca/btl/vader/btl_vader_fbox.h ++++ b/opal/mca/btl/vader/btl_vader_fbox.h +@@ -29,6 +29,10 @@ typedef union mca_btl_vader_fbox_hdr_t { + /** sequence number */ + uint16_t seq; + } data; ++ struct { ++ uint32_t value0; ++ uint32_t value1; ++ } data_i32; + uint64_t ival; + } mca_btl_vader_fbox_hdr_t; + +@@ -50,19 +54,20 @@ void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endp + static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t tag, + uint16_t seq, uint32_t size) + { +- mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = 0, .seq = seq, .size = size}}; +- hdr->ival = tmp.ival; ++ mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}}; ++ /* clear out existing tag/seq */ ++ hdr->data_i32.value1 = 0; ++ opal_atomic_wmb (); ++ hdr->data_i32.value0 = size; + opal_atomic_wmb (); +- hdr->data.tag = tag; ++ hdr->data_i32.value1 = tmp.data_i32.value1; + } + + static inline mca_btl_vader_fbox_hdr_t mca_btl_vader_fbox_read_header (mca_btl_vader_fbox_hdr_t *hdr) + { +- mca_btl_vader_fbox_hdr_t tmp; +- uint16_t tag = hdr->data.tag; ++ mca_btl_vader_fbox_hdr_t tmp = {.data_i32 = {.value1 = hdr->data_i32.value1}};; + opal_atomic_rmb (); +- tmp.ival = hdr->ival; +- tmp.data.tag = tag; ++ tmp.data_i32.value0 = hdr->data_i32.value0; + return tmp; + } + +-- +1.8.3.1 + diff --git a/SPECS/openmpi.spec b/SPECS/openmpi.spec index 3989c22..324a16c 100644 --- a/SPECS/openmpi.spec +++ b/SPECS/openmpi.spec @@ -40,6 +40,10 @@ Source3: macros.openmpi Patch0: openmpi-2.1.1-disable-fifo-test.patch Patch1: fix-optimization-flags.patch +Patch9997: 0021-btl-vader-ensure-that-the-send-tag-is-always-written.patch +Patch9998: 0038-btl-vader-ensure-the-fast-box-tag-is-always-read-fir.patch +Patch9999: 0040-btl-vader-fix-race-condition-in-writing-header.patch + BuildRequires: gcc-gfortran %ifnarch s390 s390x BuildRequires: valgrind-devel @@ -140,6 +144,11 @@ OpenMPI support for Python 3. %patch0 -p1 %endif %patch1 -p1 +%ifarch %{arm} +%patch9997 -p1 +%patch9998 -p1 +%patch9999 -p1 +%endif %build %set_build_flags @@ -292,6 +301,9 @@ make check %changelog +* Tue Jun 04 2019 Pablo Greco 3.1.2-5 +- Fix problems with fttw in armhfp + * Tue Sep 25 2018 Jarod Wilson - 3.1.2-5 - Update BR: opensm-devel min version and rebuild against opensm 3.3.21 - Resolves: rhbz#1630653