f7590e
diff --git a/include/private/gcconfig.h b/include/private/gcconfig.h
f7590e
index 767859c..ec92882 100644
f7590e
--- a/include/private/gcconfig.h
f7590e
+++ b/include/private/gcconfig.h
f7590e
@@ -70,6 +70,13 @@
f7590e
 #    define I386
f7590e
 #    define mach_type_known
f7590e
 # endif
f7590e
+# if defined(__aarch64__)
f7590e
+#    define AARCH64
f7590e
+#    if !defined(LINUX)
f7590e
+#      define NOSYS
f7590e
+#      define mach_type_known
f7590e
+#    endif
f7590e
+# endif
f7590e
 # if defined(__arm) || defined(__arm__) || defined(__thumb__)
f7590e
 #    define ARM32
f7590e
 #    if !defined(LINUX) && !defined(NETBSD) && !defined(OPENBSD) \
f7590e
@@ -250,6 +257,10 @@
f7590e
 #    define IA64
f7590e
 #    define mach_type_known
f7590e
 # endif
f7590e
+# if defined(LINUX) && defined(__aarch64__)
f7590e
+#    define AARCH64
f7590e
+#    define mach_type_known
f7590e
+# endif
f7590e
 # if defined(LINUX) && (defined(__arm) || defined(__arm__))
f7590e
 #    define ARM32
f7590e
 #    define mach_type_known
f7590e
@@ -537,6 +548,7 @@
f7590e
                     /*                  running Amdahl UTS4             */
f7590e
                     /*             S390       ==> 390-like machine      */
f7590e
                     /*                  running LINUX                   */
f7590e
+                    /*             AARCH64    ==> ARM AArch64           */
f7590e
                     /*             ARM32      ==> Intel StrongARM       */
f7590e
                     /*             IA64       ==> Intel IPF             */
f7590e
                     /*                            (e.g. Itanium)        */
f7590e
@@ -1899,6 +1911,31 @@
f7590e
 #   endif
f7590e
 # endif
f7590e
 
f7590e
+# ifdef AARCH64
f7590e
+#   define CPP_WORDSZ 64
f7590e
+#   define MACH_TYPE "AARCH64"
f7590e
+#   define ALIGNMENT 8
f7590e
+#   ifndef HBLKSIZE
f7590e
+#     define HBLKSIZE 4096
f7590e
+#   endif
f7590e
+#   ifdef LINUX
f7590e
+#     define OS_TYPE "LINUX"
f7590e
+#     define LINUX_STACKBOTTOM
f7590e
+#     define DYNAMIC_LOADING
f7590e
+      extern int __data_start[];
f7590e
+#     define DATASTART ((ptr_t)__data_start)
f7590e
+      extern char _end[];
f7590e
+#     define DATAEND ((ptr_t)(&_end))
f7590e
+#   endif
f7590e
+#   ifdef NOSYS
f7590e
+      /* __data_start is usually defined in the target linker script.   */
f7590e
+      extern int __data_start[];
f7590e
+#     define DATASTART ((ptr_t)__data_start)
f7590e
+      extern void *__stack_base__;
f7590e
+#     define STACKBOTTOM ((ptr_t)__stack_base__)
f7590e
+#   endif
f7590e
+# endif
f7590e
+
f7590e
 # ifdef ARM32
f7590e
 #   define CPP_WORDSZ 32
f7590e
 #   define MACH_TYPE "ARM32"
f7590e
diff --git a/libatomic_ops/src/atomic_ops.h b/libatomic_ops/src/atomic_ops.h
f7590e
index db177d5..d91da53 100644
f7590e
--- a/libatomic_ops/src/atomic_ops.h
f7590e
+++ b/libatomic_ops/src/atomic_ops.h
f7590e
@@ -244,6 +244,10 @@
f7590e
      || defined(__powerpc64__) || defined(__ppc64__)
f7590e
 #   include "atomic_ops/sysdeps/gcc/powerpc.h"
f7590e
 # endif /* __powerpc__ */
f7590e
+# if defined(__aarch64__)
f7590e
+#   include "atomic_ops/sysdeps/gcc/aarch64.h"
f7590e
+#   define AO_CAN_EMUL_CAS
f7590e
+# endif /* __aarch64__ */
f7590e
 # if defined(__arm__) && !defined(AO_USE_PTHREAD_DEFS)
f7590e
 #   include "atomic_ops/sysdeps/gcc/arm.h"
f7590e
 #   define AO_CAN_EMUL_CAS
f7590e
diff --git a/libatomic_ops/src/atomic_ops/sysdeps/Makefile.am b/libatomic_ops/src/atomic_ops/sysdeps/Makefile.am
f7590e
index d8b24dc..b73a20c 100644
f7590e
--- a/libatomic_ops/src/atomic_ops/sysdeps/Makefile.am
f7590e
+++ b/libatomic_ops/src/atomic_ops/sysdeps/Makefile.am
f7590e
@@ -30,6 +30,7 @@ nobase_sysdep_HEADERS= generic_pthread.h \
f7590e
 	  gcc/hexagon.h gcc/hppa.h gcc/ia64.h gcc/m68k.h \
f7590e
 	  gcc/mips.h gcc/powerpc.h gcc/s390.h \
f7590e
 	  gcc/sh.h gcc/sparc.h gcc/x86.h gcc/x86_64.h \
f7590e
+	  gcc/aarch64.h \
f7590e
 	\
f7590e
 	  hpc/hppa.h hpc/ia64.h \
f7590e
 	\
f7590e
diff --git a/libatomic_ops/src/atomic_ops/sysdeps/gcc/aarch64.h b/libatomic_ops/src/atomic_ops/sysdeps/gcc/aarch64.h
f7590e
new file mode 100644
f7590e
index 0000000..94f1f14
f7590e
--- /dev/null
f7590e
+++ b/libatomic_ops/src/atomic_ops/sysdeps/gcc/aarch64.h
f7590e
@@ -0,0 +1,353 @@
f7590e
+/*
f7590e
+ * Copyright (c) 1991-1994 by Xerox Corporation.  All rights reserved.
f7590e
+ * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
f7590e
+ * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
f7590e
+ *
f7590e
+ *
f7590e
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
f7590e
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
f7590e
+ *
f7590e
+ * Permission is hereby granted to use or copy this program
f7590e
+ * for any purpose,  provided the above notices are retained on all copies.
f7590e
+ * Permission to modify the code and to distribute modified code is granted,
f7590e
+ * provided the above notices are retained, and a notice that the code was
f7590e
+ * modified is included with the above copyright notice.
f7590e
+ *
f7590e
+ */
f7590e
+
f7590e
+#include "../read_ordered.h"
f7590e
+
f7590e
+#include "../test_and_set_t_is_ao_t.h"
f7590e
+
f7590e
+#include "../standard_ao_double_t.h"
f7590e
+
f7590e
+#ifndef AO_UNIPROCESSOR
f7590e
+  AO_INLINE void
f7590e
+  AO_nop_write(void)
f7590e
+  {
f7590e
+    __asm__ __volatile__("dmb st" : : : "memory");
f7590e
+  }
f7590e
+# define AO_HAVE_nop_write
f7590e
+#endif
f7590e
+
f7590e
+#ifndef AO_EXPECT_FALSE
f7590e
+#if __GNUC__ >= 3 && !defined(LINT2)
f7590e
+# define AO_EXPECT_FALSE(expr) __builtin_expect(expr, 0)
f7590e
+  /* Equivalent to (expr) but predict that usually (expr) == 0. */
f7590e
+#else
f7590e
+# define AO_EXPECT_FALSE(expr) (expr)
f7590e
+#endif /* !__GNUC__ */
f7590e
+#endif
f7590e
+
f7590e
+/* TODO: Adjust version check on fixing double-wide AO support in GCC. */
f7590e
+#if __GNUC__ == 4
f7590e
+
f7590e
+  AO_INLINE AO_double_t
f7590e
+  AO_double_load(const volatile AO_double_t *addr)
f7590e
+  {
f7590e
+    AO_double_t result;
f7590e
+    int status;
f7590e
+
f7590e
+    /* Note that STXP cannot be discarded because LD[A]XP is not        */
f7590e
+    /* single-copy atomic (unlike LDREXD for 32-bit ARM).               */
f7590e
+    do {
f7590e
+      __asm__ __volatile__("//AO_double_load\n"
f7590e
+      "       ldxp  %0, %1, %3\n"
f7590e
+      "       stxp %w2, %0, %1, %3"
f7590e
+      : "=&r" (result.AO_val1), "=&r" (result.AO_val2), "=&r" (status)
f7590e
+      : "Q" (*addr));
f7590e
+    } while (AO_EXPECT_FALSE(status));
f7590e
+    return result;
f7590e
+  }
f7590e
+# define AO_HAVE_double_load
f7590e
+
f7590e
+  AO_INLINE AO_double_t
f7590e
+  AO_double_load_acquire(const volatile AO_double_t *addr)
f7590e
+  {
f7590e
+    AO_double_t result;
f7590e
+    int status;
f7590e
+
f7590e
+    do {
f7590e
+      __asm__ __volatile__("//AO_double_load_acquire\n"
f7590e
+      "       ldaxp  %0, %1, %3\n"
f7590e
+      "       stxp %w2, %0, %1, %3"
f7590e
+      : "=&r" (result.AO_val1), "=&r" (result.AO_val2), "=&r" (status)
f7590e
+      : "Q" (*addr));
f7590e
+    } while (AO_EXPECT_FALSE(status));
f7590e
+    return result;
f7590e
+  }
f7590e
+# define AO_HAVE_double_load_acquire
f7590e
+
f7590e
+  AO_INLINE void
f7590e
+  AO_double_store(volatile AO_double_t *addr, AO_double_t value)
f7590e
+  {
f7590e
+    AO_double_t old_val;
f7590e
+    int status;
f7590e
+
f7590e
+    do {
f7590e
+      __asm__ __volatile__("//AO_double_store\n"
f7590e
+      "       ldxp  %0, %1, %3\n"
f7590e
+      "       stxp %w2, %4, %5, %3"
f7590e
+      : "=&r" (old_val.AO_val1), "=&r" (old_val.AO_val2), "=&r" (status),
f7590e
+        "=Q" (*addr)
f7590e
+      : "r" (value.AO_val1), "r" (value.AO_val2));
f7590e
+      /* Compared to the arm.h implementation, the 'cc' (flags) are not */
f7590e
+      /* clobbered because A64 has no concept of conditional execution. */
f7590e
+    } while (AO_EXPECT_FALSE(status));
f7590e
+  }
f7590e
+# define AO_HAVE_double_store
f7590e
+
f7590e
+  AO_INLINE void
f7590e
+  AO_double_store_release(volatile AO_double_t *addr, AO_double_t value)
f7590e
+  {
f7590e
+    AO_double_t old_val;
f7590e
+    int status;
f7590e
+
f7590e
+    do {
f7590e
+      __asm__ __volatile__("//AO_double_store_release\n"
f7590e
+      "       ldxp  %0, %1, %3\n"
f7590e
+      "       stlxp %w2, %4, %5, %3"
f7590e
+      : "=&r" (old_val.AO_val1), "=&r" (old_val.AO_val2), "=&r" (status),
f7590e
+        "=Q" (*addr)
f7590e
+      : "r" (value.AO_val1), "r" (value.AO_val2));
f7590e
+    } while (AO_EXPECT_FALSE(status));
f7590e
+  }
f7590e
+# define AO_HAVE_double_store_release
f7590e
+
f7590e
+  AO_INLINE int
f7590e
+  AO_double_compare_and_swap(volatile AO_double_t *addr,
f7590e
+                             AO_double_t old_val, AO_double_t new_val)
f7590e
+  {
f7590e
+    AO_double_t tmp;
f7590e
+    int result = 1;
f7590e
+
f7590e
+    do {
f7590e
+      __asm__ __volatile__("//AO_double_compare_and_swap\n"
f7590e
+        "       ldxp  %0, %1, %2\n"
f7590e
+        : "=&r" (tmp.AO_val1), "=&r" (tmp.AO_val2)
f7590e
+        : "Q" (*addr));
f7590e
+      if (tmp.AO_val1 != old_val.AO_val1 || tmp.AO_val2 != old_val.AO_val2)
f7590e
+        break;
f7590e
+      __asm__ __volatile__(
f7590e
+        "       stxp %w0, %2, %3, %1\n"
f7590e
+        : "=&r" (result), "=Q" (*addr)
f7590e
+        : "r" (new_val.AO_val1), "r" (new_val.AO_val2));
f7590e
+    } while (AO_EXPECT_FALSE(result));
f7590e
+    return !result;
f7590e
+  }
f7590e
+# define AO_HAVE_double_compare_and_swap
f7590e
+
f7590e
+  AO_INLINE int
f7590e
+  AO_double_compare_and_swap_acquire(volatile AO_double_t *addr,
f7590e
+                                     AO_double_t old_val, AO_double_t new_val)
f7590e
+  {
f7590e
+    AO_double_t tmp;
f7590e
+    int result = 1;
f7590e
+
f7590e
+    do {
f7590e
+      __asm__ __volatile__("//AO_double_compare_and_swap_acquire\n"
f7590e
+        "       ldaxp  %0, %1, %2\n"
f7590e
+        : "=&r" (tmp.AO_val1), "=&r" (tmp.AO_val2)
f7590e
+        : "Q" (*addr));
f7590e
+      if (tmp.AO_val1 != old_val.AO_val1 || tmp.AO_val2 != old_val.AO_val2)
f7590e
+        break;
f7590e
+      __asm__ __volatile__(
f7590e
+        "       stxp %w0, %2, %3, %1\n"
f7590e
+        : "=&r" (result), "=Q" (*addr)
f7590e
+        : "r" (new_val.AO_val1), "r" (new_val.AO_val2));
f7590e
+    } while (AO_EXPECT_FALSE(result));
f7590e
+    return !result;
f7590e
+  }
f7590e
+# define AO_HAVE_double_compare_and_swap_acquire
f7590e
+
f7590e
+  AO_INLINE int
f7590e
+  AO_double_compare_and_swap_release(volatile AO_double_t *addr,
f7590e
+                                     AO_double_t old_val, AO_double_t new_val)
f7590e
+  {
f7590e
+    AO_double_t tmp;
f7590e
+    int result = 1;
f7590e
+
f7590e
+    do {
f7590e
+      __asm__ __volatile__("//AO_double_compare_and_swap_release\n"
f7590e
+        "       ldxp  %0, %1, %2\n"
f7590e
+        : "=&r" (tmp.AO_val1), "=&r" (tmp.AO_val2)
f7590e
+        : "Q" (*addr));
f7590e
+      if (tmp.AO_val1 != old_val.AO_val1 || tmp.AO_val2 != old_val.AO_val2)
f7590e
+        break;
f7590e
+      __asm__ __volatile__(
f7590e
+        "       stlxp %w0, %2, %3, %1\n"
f7590e
+        : "=&r" (result), "=Q" (*addr)
f7590e
+        : "r" (new_val.AO_val1), "r" (new_val.AO_val2));
f7590e
+    } while (AO_EXPECT_FALSE(result));
f7590e
+    return !result;
f7590e
+  }
f7590e
+# define AO_HAVE_double_compare_and_swap_release
f7590e
+#endif
f7590e
+
f7590e
+AO_INLINE void
f7590e
+AO_nop_full(void)
f7590e
+{
f7590e
+# ifndef AO_UNIPROCESSOR
f7590e
+__sync_synchronize ();
f7590e
+# endif
f7590e
+}
f7590e
+#define AO_HAVE_nop_full
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_load(const volatile AO_t *addr)
f7590e
+{
f7590e
+  return  (AO_t)__atomic_load_n (addr, __ATOMIC_RELAXED);
f7590e
+}
f7590e
+#define AO_HAVE_load
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_load_acquire(const volatile AO_t *addr)
f7590e
+{
f7590e
+  return (AO_t)__atomic_load_n (addr, __ATOMIC_ACQUIRE);
f7590e
+}
f7590e
+#define AO_HAVE_load_acquire
f7590e
+
f7590e
+AO_INLINE void
f7590e
+ AO_store(volatile AO_t *addr, AO_t value)
f7590e
+{
f7590e
+  __atomic_store_n(addr, value, __ATOMIC_RELAXED);
f7590e
+}
f7590e
+#define AO_HAVE_store
f7590e
+
f7590e
+AO_INLINE void
f7590e
+ AO_store_release(volatile AO_t *addr, AO_t value)
f7590e
+{
f7590e
+  __atomic_store_n(addr, value, __ATOMIC_RELEASE);
f7590e
+}
f7590e
+#define AO_HAVE_store_release
f7590e
+
f7590e
+AO_INLINE AO_TS_VAL_t
f7590e
+AO_test_and_set(volatile AO_TS_t *addr)
f7590e
+{
f7590e
+  return (AO_TS_VAL_t)__atomic_test_and_set(addr, __ATOMIC_RELAXED);
f7590e
+}
f7590e
+# define AO_HAVE_test_and_set
f7590e
+
f7590e
+AO_INLINE AO_TS_VAL_t
f7590e
+AO_test_and_set_acquire(volatile AO_TS_t *addr)
f7590e
+{
f7590e
+    return (AO_TS_VAL_t)__atomic_test_and_set(addr, __ATOMIC_ACQUIRE);
f7590e
+}
f7590e
+# define AO_HAVE_test_and_set_acquire
f7590e
+
f7590e
+AO_INLINE AO_TS_VAL_t
f7590e
+AO_test_and_set_release(volatile AO_TS_t *addr)
f7590e
+{
f7590e
+    return (AO_TS_VAL_t)__atomic_test_and_set(addr, __ATOMIC_RELEASE);
f7590e
+}
f7590e
+# define AO_HAVE_test_and_set_release
f7590e
+
f7590e
+AO_INLINE AO_TS_VAL_t
f7590e
+AO_test_and_set_full(volatile AO_TS_t *addr)
f7590e
+{
f7590e
+    return (AO_TS_VAL_t)__atomic_test_and_set(addr, __ATOMIC_SEQ_CST);
f7590e
+}
f7590e
+# define AO_HAVE_test_and_set_full
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add(volatile AO_t *p, AO_t incr)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, incr, __ATOMIC_RELAXED);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add_acquire(volatile AO_t *p, AO_t incr)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, incr, __ATOMIC_ACQUIRE);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add_acquire
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add_release(volatile AO_t *p, AO_t incr)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, incr, __ATOMIC_RELEASE);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add_release
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add_full(volatile AO_t *p, AO_t incr)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, incr, __ATOMIC_SEQ_CST);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add_full
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add1(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, 1, __ATOMIC_RELAXED);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add1
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add1_acquire(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, 1, __ATOMIC_ACQUIRE);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add1_acquire
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add1_release(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, 1, __ATOMIC_RELEASE);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add1_release
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_add1_full(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_add(p, 1, __ATOMIC_SEQ_CST);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_add1_full
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_sub1(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_sub(p, 1, __ATOMIC_RELAXED);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_sub1
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_sub1_acquire(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_sub(p, 1, __ATOMIC_ACQUIRE);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_sub1_acquire
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_sub1_release(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_sub(p, 1, __ATOMIC_RELEASE);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_sub1_release
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_and_sub1_full(volatile AO_t *p)
f7590e
+{
f7590e
+  return (AO_t)__atomic_fetch_sub(p, 1, __ATOMIC_SEQ_CST);
f7590e
+}
f7590e
+#define AO_HAVE_fetch_and_sub1_full
f7590e
+
f7590e
+/* Returns nonzero if the comparison succeeded.  */
f7590e
+AO_INLINE int
f7590e
+AO_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
f7590e
+{
f7590e
+  return (int)__sync_bool_compare_and_swap(addr, old_val, new_val);
f7590e
+}
f7590e
+# define AO_HAVE_compare_and_swap
f7590e
+
f7590e
+AO_INLINE AO_t
f7590e
+AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
f7590e
+{
f7590e
+    return (AO_t)__sync_val_compare_and_swap(addr, old_val, new_val);
f7590e
+}
f7590e
+# define AO_HAVE_fetch_compare_and_swap
f7590e
+
f7590e
+
f7590e
+
f7590e
+#include "../../generalize.h"
f7590e
diff --git a/libatomic_ops/src/atomic_ops/sysdeps/standard_ao_double_t.h b/libatomic_ops/src/atomic_ops/sysdeps/standard_ao_double_t.h
f7590e
index 7089f05..de726fc 100644
f7590e
--- a/libatomic_ops/src/atomic_ops/sysdeps/standard_ao_double_t.h
f7590e
+++ b/libatomic_ops/src/atomic_ops/sysdeps/standard_ao_double_t.h
f7590e
@@ -11,6 +11,8 @@
f7590e
   typedef __m128 double_ptr_storage;
f7590e
 #elif defined(_WIN32) && !defined(__GNUC__)
f7590e
   typedef unsigned __int64 double_ptr_storage;
f7590e
+#elif defined(__aarch64__)
f7590e
+  typedef unsigned __int128 double_ptr_storage;
f7590e
 #else
f7590e
   typedef unsigned long long double_ptr_storage;
f7590e
 #endif