|
|
001c85 |
2016-01-22 Torvald Riegel <triegel@redhat.com>
|
|
|
001c85 |
|
|
|
001c85 |
* beginend.cc (GTM::gtm_thread::serial_lock): Put on cacheline
|
|
|
001c85 |
boundary.
|
|
|
001c85 |
(htm_fastpath): Remove.
|
|
|
001c85 |
(gtm_thread::begin_transaction): Fix HTM fastpath.
|
|
|
001c85 |
(_ITM_commitTransaction): Adapt.
|
|
|
001c85 |
(_ITM_commitTransactionEH): Adapt.
|
|
|
001c85 |
* libitm/config/linux/rwlock.h (gtm_rwlock): Add htm_fastpath member
|
|
|
001c85 |
and accessors.
|
|
|
001c85 |
* libitm/config/posix/rwlock.h (gtm_rwlock): Likewise.
|
|
|
001c85 |
* libitm/config/posix/rwlock.cc (gtm_rwlock::gtm_rwlock): Adapt.
|
|
|
001c85 |
* libitm/libitm_i.h (htm_fastpath): Remove declaration.
|
|
|
001c85 |
* libitm/method-serial.cc (htm_mg): Adapt.
|
|
|
001c85 |
(gtm_thread::serialirr_mode): Adapt.
|
|
|
001c85 |
* libitm/query.cc (_ITM_inTransaction, _ITM_getTransactionId): Adapt.
|
|
|
001c85 |
|
|
|
001c85 |
--- libitm/beginend.cc
|
|
|
001c85 |
+++ libitm/beginend.cc
|
|
|
001c85 |
@@ -32,7 +32,11 @@ using namespace GTM;
|
|
|
001c85 |
extern __thread gtm_thread_tls _gtm_thr_tls;
|
|
|
001c85 |
#endif
|
|
|
001c85 |
|
|
|
001c85 |
-gtm_rwlock GTM::gtm_thread::serial_lock;
|
|
|
001c85 |
+// Put this at the start of a cacheline so that serial_lock's writers and
|
|
|
001c85 |
+// htm_fastpath fields are on the same cacheline, so that HW transactions
|
|
|
001c85 |
+// only have to pay one cacheline capacity to monitor both.
|
|
|
001c85 |
+gtm_rwlock GTM::gtm_thread::serial_lock
|
|
|
001c85 |
+ __attribute__((aligned(HW_CACHELINE_SIZE)));
|
|
|
001c85 |
gtm_thread *GTM::gtm_thread::list_of_threads = 0;
|
|
|
001c85 |
unsigned GTM::gtm_thread::number_of_threads = 0;
|
|
|
001c85 |
|
|
|
001c85 |
@@ -54,9 +58,6 @@ static pthread_mutex_t global_tid_lock = PTHREAD_MUTEX_INITIALIZER;
|
|
|
001c85 |
static pthread_key_t thr_release_key;
|
|
|
001c85 |
static pthread_once_t thr_release_once = PTHREAD_ONCE_INIT;
|
|
|
001c85 |
|
|
|
001c85 |
-// See gtm_thread::begin_transaction.
|
|
|
001c85 |
-uint32_t GTM::htm_fastpath = 0;
|
|
|
001c85 |
-
|
|
|
001c85 |
/* Allocate a transaction structure. */
|
|
|
001c85 |
void *
|
|
|
001c85 |
GTM::gtm_thread::operator new (size_t s)
|
|
|
001c85 |
@@ -174,9 +175,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
|
|
|
001c85 |
// lock's writer flag and thus abort if another thread is or becomes a
|
|
|
001c85 |
// serial transaction. Therefore, if the fastpath is enabled, then a
|
|
|
001c85 |
// transaction is not executing as a HW transaction iff the serial lock is
|
|
|
001c85 |
- // write-locked. This allows us to use htm_fastpath and the serial lock's
|
|
|
001c85 |
- // writer flag to reliable determine whether the current thread runs a HW
|
|
|
001c85 |
- // transaction, and thus we do not need to maintain this information in
|
|
|
001c85 |
+ // write-locked. Also, HW transactions monitor the fastpath control
|
|
|
001c85 |
+ // variable, so that they will only execute if dispatch_htm is still the
|
|
|
001c85 |
+ // current method group. This allows us to use htm_fastpath and the serial
|
|
|
001c85 |
+ // lock's writers flag to reliable determine whether the current thread runs
|
|
|
001c85 |
+ // a HW transaction, and thus we do not need to maintain this information in
|
|
|
001c85 |
// per-thread state.
|
|
|
001c85 |
// If an uninstrumented code path is not available, we can still run
|
|
|
001c85 |
// instrumented code from a HW transaction because the HTM fastpath kicks
|
|
|
001c85 |
@@ -187,9 +190,14 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
|
|
|
001c85 |
// indeed in serial mode, and HW transactions should never need serial mode
|
|
|
001c85 |
// for any internal changes (e.g., they never abort visibly to the STM code
|
|
|
001c85 |
// and thus do not trigger the standard retry handling).
|
|
|
001c85 |
- if (likely(htm_fastpath && (prop & pr_hasNoAbort)))
|
|
|
001c85 |
+ if (likely(serial_lock.get_htm_fastpath() && (prop & pr_hasNoAbort)))
|
|
|
001c85 |
{
|
|
|
001c85 |
- for (uint32_t t = htm_fastpath; t; t--)
|
|
|
001c85 |
+ // Note that the snapshot of htm_fastpath that we take here could be
|
|
|
001c85 |
+ // outdated, and a different method group than dispatch_htm may have
|
|
|
001c85 |
+ // been chosen in the meantime. Therefore, take care not not touch
|
|
|
001c85 |
+ // anything besides the serial lock, which is independent of method
|
|
|
001c85 |
+ // groups.
|
|
|
001c85 |
+ for (uint32_t t = serial_lock.get_htm_fastpath(); t; t--)
|
|
|
001c85 |
{
|
|
|
001c85 |
uint32_t ret = htm_begin();
|
|
|
001c85 |
if (htm_begin_success(ret))
|
|
|
001c85 |
@@ -197,9 +205,11 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
|
|
|
001c85 |
// We are executing a transaction now.
|
|
|
001c85 |
// Monitor the writer flag in the serial-mode lock, and abort
|
|
|
001c85 |
// if there is an active or waiting serial-mode transaction.
|
|
|
001c85 |
+ // Also checks that htm_fastpath is still nonzero and thus
|
|
|
001c85 |
+ // HW transactions are allowed to run.
|
|
|
001c85 |
// Note that this can also happen due to an enclosing
|
|
|
001c85 |
// serial-mode transaction; we handle this case below.
|
|
|
001c85 |
- if (unlikely(serial_lock.is_write_locked()))
|
|
|
001c85 |
+ if (unlikely(serial_lock.htm_fastpath_disabled()))
|
|
|
001c85 |
htm_abort();
|
|
|
001c85 |
else
|
|
|
001c85 |
// We do not need to set a_saveLiveVariables because of HTM.
|
|
|
001c85 |
@@ -210,9 +220,12 @@ GTM::gtm_thread::begin_transaction (uint32_t prop, const gtm_jmpbuf *jb)
|
|
|
001c85 |
// retrying the transaction will be successful.
|
|
|
001c85 |
if (!htm_abort_should_retry(ret))
|
|
|
001c85 |
break;
|
|
|
001c85 |
+ // Check whether the HTM fastpath has been disabled.
|
|
|
001c85 |
+ if (!serial_lock.get_htm_fastpath())
|
|
|
001c85 |
+ break;
|
|
|
001c85 |
// Wait until any concurrent serial-mode transactions have finished.
|
|
|
001c85 |
// This is an empty critical section, but won't be elided.
|
|
|
001c85 |
- if (serial_lock.is_write_locked())
|
|
|
001c85 |
+ if (serial_lock.htm_fastpath_disabled())
|
|
|
001c85 |
{
|
|
|
001c85 |
tx = gtm_thr();
|
|
|
001c85 |
if (unlikely(tx == NULL))
|
|
|
001c85 |
@@ -618,7 +631,7 @@ _ITM_commitTransaction(void)
|
|
|
001c85 |
// a serial-mode transaction. If we are, then there will be no other
|
|
|
001c85 |
// concurrent serial-mode transaction.
|
|
|
001c85 |
// See gtm_thread::begin_transaction.
|
|
|
001c85 |
- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
|
|
|
001c85 |
+ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled()))
|
|
|
001c85 |
{
|
|
|
001c85 |
htm_commit();
|
|
|
001c85 |
return;
|
|
|
001c85 |
@@ -634,7 +647,7 @@ _ITM_commitTransactionEH(void *exc_ptr)
|
|
|
001c85 |
{
|
|
|
001c85 |
#if defined(USE_HTM_FASTPATH)
|
|
|
001c85 |
// See _ITM_commitTransaction.
|
|
|
001c85 |
- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
|
|
|
001c85 |
+ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled()))
|
|
|
001c85 |
{
|
|
|
001c85 |
htm_commit();
|
|
|
001c85 |
return;
|
|
|
001c85 |
--- libitm/config/linux/rwlock.h
|
|
|
001c85 |
+++ libitm/config/linux/rwlock.h
|
|
|
001c85 |
@@ -39,16 +39,29 @@ struct gtm_thread;
|
|
|
001c85 |
//
|
|
|
001c85 |
// In this implementation, writers are given highest priority access but
|
|
|
001c85 |
// read-to-write upgrades do not have a higher priority than writers.
|
|
|
001c85 |
+//
|
|
|
001c85 |
+// Do not change the layout of this class; it must remain a POD type with
|
|
|
001c85 |
+// standard layout, and the writers field must be first (i.e., so the
|
|
|
001c85 |
+// assembler code can assume that its address is equal to the address of the
|
|
|
001c85 |
+// respective instance of the class), and htm_fastpath must be second.
|
|
|
001c85 |
|
|
|
001c85 |
class gtm_rwlock
|
|
|
001c85 |
{
|
|
|
001c85 |
- // TODO Put futexes on different cachelines?
|
|
|
001c85 |
std::atomic<int> writers; // Writers' futex.
|
|
|
001c85 |
+ // We put the HTM fastpath control variable here so that HTM fastpath
|
|
|
001c85 |
+ // transactions can check efficiently whether they are allowed to run.
|
|
|
001c85 |
+ // This must be accessed atomically because threads can load this value
|
|
|
001c85 |
+ // when they are neither a registered reader nor writer (i.e., when they
|
|
|
001c85 |
+ // attempt to execute the HTM fastpath).
|
|
|
001c85 |
+ std::atomic<uint32_t> htm_fastpath;
|
|
|
001c85 |
+ // TODO Put these futexes on different cachelines? (writers and htm_fastpath
|
|
|
001c85 |
+ // should remain on the same cacheline.
|
|
|
001c85 |
std::atomic<int> writer_readers;// A confirmed writer waits here for readers.
|
|
|
001c85 |
std::atomic<int> readers; // Readers wait here for writers (iff true).
|
|
|
001c85 |
|
|
|
001c85 |
public:
|
|
|
001c85 |
- gtm_rwlock() : writers(0), writer_readers(0), readers(0) {};
|
|
|
001c85 |
+ gtm_rwlock() : writers(0), htm_fastpath(0), writer_readers(0), readers(0)
|
|
|
001c85 |
+ { }
|
|
|
001c85 |
|
|
|
001c85 |
void read_lock (gtm_thread *tx);
|
|
|
001c85 |
void read_unlock (gtm_thread *tx);
|
|
|
001c85 |
@@ -59,12 +72,28 @@ class gtm_rwlock
|
|
|
001c85 |
bool write_upgrade (gtm_thread *tx);
|
|
|
001c85 |
void write_upgrade_finish (gtm_thread *tx);
|
|
|
001c85 |
|
|
|
001c85 |
- // Returns true iff there is a concurrent active or waiting writer.
|
|
|
001c85 |
- // This is primarily useful for simple HyTM approaches, and the value being
|
|
|
001c85 |
- // checked is loaded with memory_order_relaxed.
|
|
|
001c85 |
- bool is_write_locked()
|
|
|
001c85 |
+ // Returns true iff there is a concurrent active or waiting writer, or
|
|
|
001c85 |
+ // htm_fastpath is zero. This is primarily useful for simple HyTM
|
|
|
001c85 |
+ // approaches, and the values being checked are loaded with
|
|
|
001c85 |
+ // memory_order_relaxed.
|
|
|
001c85 |
+ bool htm_fastpath_disabled ()
|
|
|
001c85 |
+ {
|
|
|
001c85 |
+ return writers.load (memory_order_relaxed) != 0
|
|
|
001c85 |
+ || htm_fastpath.load (memory_order_relaxed) == 0;
|
|
|
001c85 |
+ }
|
|
|
001c85 |
+
|
|
|
001c85 |
+ // This does not need to return an exact value, hence relaxed MO is
|
|
|
001c85 |
+ // sufficient.
|
|
|
001c85 |
+ uint32_t get_htm_fastpath ()
|
|
|
001c85 |
+ {
|
|
|
001c85 |
+ return htm_fastpath.load (memory_order_relaxed);
|
|
|
001c85 |
+ }
|
|
|
001c85 |
+ // This must only be called while having acquired the write lock, and other
|
|
|
001c85 |
+ // threads do not need to load an exact value; hence relaxed MO is
|
|
|
001c85 |
+ // sufficient.
|
|
|
001c85 |
+ void set_htm_fastpath (uint32_t val)
|
|
|
001c85 |
{
|
|
|
001c85 |
- return writers.load (memory_order_relaxed) != 0;
|
|
|
001c85 |
+ htm_fastpath.store (val, memory_order_relaxed);
|
|
|
001c85 |
}
|
|
|
001c85 |
|
|
|
001c85 |
protected:
|
|
|
001c85 |
--- libitm/config/posix/rwlock.h
|
|
|
001c85 |
+++ libitm/config/posix/rwlock.h
|
|
|
001c85 |
@@ -44,19 +44,32 @@ struct gtm_thread;
|
|
|
001c85 |
//
|
|
|
001c85 |
// In this implementation, writers are given highest priority access but
|
|
|
001c85 |
// read-to-write upgrades do not have a higher priority than writers.
|
|
|
001c85 |
+//
|
|
|
001c85 |
+// Do not change the layout of this class; it must remain a POD type with
|
|
|
001c85 |
+// standard layout, and the summary field must be first (i.e., so the
|
|
|
001c85 |
+// assembler code can assume that its address is equal to the address of the
|
|
|
001c85 |
+// respective instance of the class), and htm_fastpath must be second.
|
|
|
001c85 |
|
|
|
001c85 |
class gtm_rwlock
|
|
|
001c85 |
{
|
|
|
001c85 |
- pthread_mutex_t mutex; // Held if manipulating any field.
|
|
|
001c85 |
- pthread_cond_t c_readers; // Readers wait here
|
|
|
001c85 |
- pthread_cond_t c_writers; // Writers wait here for writers
|
|
|
001c85 |
- pthread_cond_t c_confirmed_writers; // Writers wait here for readers
|
|
|
001c85 |
-
|
|
|
001c85 |
static const unsigned a_writer = 1; // An active writer.
|
|
|
001c85 |
static const unsigned w_writer = 2; // The w_writers field != 0
|
|
|
001c85 |
static const unsigned w_reader = 4; // The w_readers field != 0
|
|
|
001c85 |
|
|
|
001c85 |
std::atomic<unsigned int> summary; // Bitmask of the above.
|
|
|
001c85 |
+
|
|
|
001c85 |
+ // We put the HTM fastpath control variable here so that HTM fastpath
|
|
|
001c85 |
+ // transactions can check efficiently whether they are allowed to run.
|
|
|
001c85 |
+ // This must be accessed atomically because threads can load this value
|
|
|
001c85 |
+ // when they are neither a registered reader nor writer (i.e., when they
|
|
|
001c85 |
+ // attempt to execute the HTM fastpath).
|
|
|
001c85 |
+ std::atomic<uint32_t> htm_fastpath;
|
|
|
001c85 |
+
|
|
|
001c85 |
+ pthread_mutex_t mutex; // Held if manipulating any field.
|
|
|
001c85 |
+ pthread_cond_t c_readers; // Readers wait here
|
|
|
001c85 |
+ pthread_cond_t c_writers; // Writers wait here for writers
|
|
|
001c85 |
+ pthread_cond_t c_confirmed_writers; // Writers wait here for readers
|
|
|
001c85 |
+
|
|
|
001c85 |
unsigned int a_readers; // Nr active readers as observed by a writer
|
|
|
001c85 |
unsigned int w_readers; // Nr waiting readers
|
|
|
001c85 |
unsigned int w_writers; // Nr waiting writers
|
|
|
001c85 |
@@ -74,12 +87,28 @@ class gtm_rwlock
|
|
|
001c85 |
bool write_upgrade (gtm_thread *tx);
|
|
|
001c85 |
void write_upgrade_finish (gtm_thread *tx);
|
|
|
001c85 |
|
|
|
001c85 |
- // Returns true iff there is a concurrent active or waiting writer.
|
|
|
001c85 |
- // This is primarily useful for simple HyTM approaches, and the value being
|
|
|
001c85 |
- // checked is loaded with memory_order_relaxed.
|
|
|
001c85 |
- bool is_write_locked()
|
|
|
001c85 |
+ // Returns true iff there is a concurrent active or waiting writer, or
|
|
|
001c85 |
+ // htm_fastpath is zero. This is primarily useful for simple HyTM
|
|
|
001c85 |
+ // approaches, and the values being checked are loaded with
|
|
|
001c85 |
+ // memory_order_relaxed.
|
|
|
001c85 |
+ bool htm_fastpath_disabled ()
|
|
|
001c85 |
+ {
|
|
|
001c85 |
+ return (summary.load (memory_order_relaxed) & (a_writer | w_writer))
|
|
|
001c85 |
+ || htm_fastpath.load (memory_order_relaxed) == 0;
|
|
|
001c85 |
+ }
|
|
|
001c85 |
+
|
|
|
001c85 |
+ // This does not need to return an exact value, hence relaxed MO is
|
|
|
001c85 |
+ // sufficient.
|
|
|
001c85 |
+ uint32_t get_htm_fastpath ()
|
|
|
001c85 |
+ {
|
|
|
001c85 |
+ return htm_fastpath.load (memory_order_relaxed);
|
|
|
001c85 |
+ }
|
|
|
001c85 |
+ // This must only be called while having acquired the write lock, and other
|
|
|
001c85 |
+ // threads do not need to load an exact value; hence relaxed MO is
|
|
|
001c85 |
+ // sufficient.
|
|
|
001c85 |
+ void set_htm_fastpath (uint32_t val)
|
|
|
001c85 |
{
|
|
|
001c85 |
- return summary.load (memory_order_relaxed) & (a_writer | w_writer);
|
|
|
001c85 |
+ htm_fastpath.store (val, memory_order_relaxed);
|
|
|
001c85 |
}
|
|
|
001c85 |
|
|
|
001c85 |
protected:
|
|
|
001c85 |
--- libitm/config/posix/rwlock.cc
|
|
|
001c85 |
+++ libitm/config/posix/rwlock.cc
|
|
|
001c85 |
@@ -30,11 +30,12 @@ namespace GTM HIDDEN {
|
|
|
001c85 |
// ??? Move this back to the header file when constexpr is implemented.
|
|
|
001c85 |
|
|
|
001c85 |
gtm_rwlock::gtm_rwlock()
|
|
|
001c85 |
- : mutex (PTHREAD_MUTEX_INITIALIZER),
|
|
|
001c85 |
+ : summary (0),
|
|
|
001c85 |
+ htm_fastpath (0),
|
|
|
001c85 |
+ mutex (PTHREAD_MUTEX_INITIALIZER),
|
|
|
001c85 |
c_readers (PTHREAD_COND_INITIALIZER),
|
|
|
001c85 |
c_writers (PTHREAD_COND_INITIALIZER),
|
|
|
001c85 |
c_confirmed_writers (PTHREAD_COND_INITIALIZER),
|
|
|
001c85 |
- summary (0),
|
|
|
001c85 |
a_readers (0),
|
|
|
001c85 |
w_readers (0),
|
|
|
001c85 |
w_writers (0)
|
|
|
001c85 |
--- libitm/libitm_i.h
|
|
|
001c85 |
+++ libitm/libitm_i.h
|
|
|
001c85 |
@@ -336,10 +336,6 @@ extern abi_dispatch *dispatch_htm();
|
|
|
001c85 |
|
|
|
001c85 |
extern gtm_cacheline_mask gtm_mask_stack(gtm_cacheline *, gtm_cacheline_mask);
|
|
|
001c85 |
|
|
|
001c85 |
-// Control variable for the HTM fastpath that uses serial mode as fallback.
|
|
|
001c85 |
-// Non-zero if the HTM fastpath is enabled. See gtm_thread::begin_transaction.
|
|
|
001c85 |
-extern uint32_t htm_fastpath;
|
|
|
001c85 |
-
|
|
|
001c85 |
} // namespace GTM
|
|
|
001c85 |
|
|
|
001c85 |
#endif // LIBITM_I_H
|
|
|
001c85 |
--- libitm/method-serial.cc
|
|
|
001c85 |
+++ libitm/method-serial.cc
|
|
|
001c85 |
@@ -222,13 +222,13 @@ struct htm_mg : public method_group
|
|
|
001c85 |
// Enable the HTM fastpath if the HW is available. The fastpath is
|
|
|
001c85 |
// initially disabled.
|
|
|
001c85 |
#ifdef USE_HTM_FASTPATH
|
|
|
001c85 |
- htm_fastpath = htm_init();
|
|
|
001c85 |
+ gtm_thread::serial_lock.set_htm_fastpath(htm_init());
|
|
|
001c85 |
#endif
|
|
|
001c85 |
}
|
|
|
001c85 |
virtual void fini()
|
|
|
001c85 |
{
|
|
|
001c85 |
// Disable the HTM fastpath.
|
|
|
001c85 |
- htm_fastpath = 0;
|
|
|
001c85 |
+ gtm_thread::serial_lock.set_htm_fastpath(0);
|
|
|
001c85 |
}
|
|
|
001c85 |
};
|
|
|
001c85 |
|
|
|
001c85 |
@@ -288,7 +288,7 @@ GTM::gtm_thread::serialirr_mode ()
|
|
|
001c85 |
#if defined(USE_HTM_FASTPATH)
|
|
|
001c85 |
// HTM fastpath. If we are executing a HW transaction, don't go serial but
|
|
|
001c85 |
// continue. See gtm_thread::begin_transaction.
|
|
|
001c85 |
- if (likely(htm_fastpath && !gtm_thread::serial_lock.is_write_locked()))
|
|
|
001c85 |
+ if (likely(!gtm_thread::serial_lock.htm_fastpath_disabled()))
|
|
|
001c85 |
return;
|
|
|
001c85 |
#endif
|
|
|
001c85 |
|
|
|
001c85 |
--- libitm/query.cc
|
|
|
001c85 |
+++ libitm/query.cc
|
|
|
001c85 |
@@ -49,7 +49,7 @@ _ITM_inTransaction (void)
|
|
|
001c85 |
// a transaction and thus we can't deduce this by looking at just the serial
|
|
|
001c85 |
// lock. This function isn't used in practice currently, so the easiest
|
|
|
001c85 |
// way to handle it is to just abort.
|
|
|
001c85 |
- if (htm_fastpath && htm_transaction_active())
|
|
|
001c85 |
+ if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active())
|
|
|
001c85 |
htm_abort();
|
|
|
001c85 |
#endif
|
|
|
001c85 |
struct gtm_thread *tx = gtm_thr();
|
|
|
001c85 |
@@ -69,7 +69,7 @@ _ITM_getTransactionId (void)
|
|
|
001c85 |
{
|
|
|
001c85 |
#if defined(USE_HTM_FASTPATH)
|
|
|
001c85 |
// See ITM_inTransaction.
|
|
|
001c85 |
- if (htm_fastpath && htm_transaction_active())
|
|
|
001c85 |
+ if (gtm_thread::serial_lock.get_htm_fastpath() && htm_transaction_active())
|
|
|
001c85 |
htm_abort();
|
|
|
001c85 |
#endif
|
|
|
001c85 |
struct gtm_thread *tx = gtm_thr();
|