Blame SOURCES/libffi-3.0.13-ppc64le-0.patch

c52aaa
diff -urp libffi-3.0.13/src/powerpc/ffi.c libffi-current/src/powerpc/ffi.c
c52aaa
--- libffi-3.0.13/src/powerpc/ffi.c	2013-03-16 22:46:20.000000000 +1030
c52aaa
+++ libffi-current/src/powerpc/ffi.c	2013-11-18 00:48:55.218044221 +1030
c52aaa
@@ -48,12 +48,8 @@ enum {
c52aaa
 
c52aaa
   FLAG_RETURNS_128BITS  = 1 << (31-27), /* cr6  */
c52aaa
 
c52aaa
-  FLAG_SYSV_SMST_R4     = 1 << (31-26), /* use r4 for FFI_SYSV 8 byte
c52aaa
-					   structs.  */
c52aaa
-  FLAG_SYSV_SMST_R3     = 1 << (31-25), /* use r3 for FFI_SYSV 4 byte
c52aaa
-					   structs.  */
c52aaa
-
c52aaa
   FLAG_ARG_NEEDS_COPY   = 1 << (31- 7),
c52aaa
+  FLAG_ARG_NEEDS_PSAVE  = FLAG_ARG_NEEDS_COPY, /* Used by ELFv2 */
c52aaa
 #ifndef __NO_FPRS__
c52aaa
   FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI */
c52aaa
 #endif
c52aaa
@@ -132,6 +128,9 @@ ffi_prep_args_SYSV (extended_cif *ecif,
c52aaa
 
c52aaa
   int i;
c52aaa
   ffi_type **ptr;
c52aaa
+#ifndef __NO_FPRS__
c52aaa
+  double double_tmp;
c52aaa
+#endif
c52aaa
   union {
c52aaa
     void **v;
c52aaa
     char **c;
c52aaa
@@ -151,7 +150,6 @@ ffi_prep_args_SYSV (extended_cif *ecif,
c52aaa
   gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
c52aaa
   intarg_count = 0;
c52aaa
 #ifndef __NO_FPRS__
c52aaa
-  double double_tmp;
c52aaa
   fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
c52aaa
   fparg_count = 0;
c52aaa
   copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
c52aaa
@@ -374,7 +372,7 @@ ffi_prep_args_SYSV (extended_cif *ecif,
c52aaa
   FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
c52aaa
   /* The assert below is testing that the number of integer arguments agrees
c52aaa
      with the number found in ffi_prep_cif_machdep().  However, intarg_count
c52aaa
-     is incremeneted whenever we place an FP arg on the stack, so account for
c52aaa
+     is incremented whenever we place an FP arg on the stack, so account for
c52aaa
      that before our assert test.  */
c52aaa
 #ifndef __NO_FPRS__
c52aaa
   if (fparg_count > NUM_FPR_ARG_REGISTERS)
c52aaa
@@ -392,6 +390,45 @@ enum {
c52aaa
 };
c52aaa
 enum { ASM_NEEDS_REGISTERS64 = 4 };
c52aaa
 
c52aaa
+#if _CALL_ELF == 2
c52aaa
+static unsigned int
c52aaa
+discover_homogeneous_aggregate (const ffi_type *t, unsigned int *elnum)
c52aaa
+{
c52aaa
+  switch (t->type)
c52aaa
+    {
c52aaa
+    case FFI_TYPE_FLOAT:
c52aaa
+    case FFI_TYPE_DOUBLE:
c52aaa
+      *elnum = 1;
c52aaa
+      return (int) t->type;
c52aaa
+
c52aaa
+    case FFI_TYPE_STRUCT:;
c52aaa
+      {
c52aaa
+	unsigned int base_elt = 0, total_elnum = 0;
c52aaa
+	ffi_type **el = t->elements;
c52aaa
+	while (*el)
c52aaa
+	  {
c52aaa
+	    unsigned int el_elt, el_elnum = 0;
c52aaa
+	    el_elt = discover_homogeneous_aggregate (*el, &el_elnum);
c52aaa
+	    if (el_elt == 0
c52aaa
+		|| (base_elt && base_elt != el_elt))
c52aaa
+	      return 0;
c52aaa
+	    base_elt = el_elt;
c52aaa
+	    total_elnum += el_elnum;
c52aaa
+	    if (total_elnum > 8)
c52aaa
+	      return 0;
c52aaa
+	    el++;
c52aaa
+	  }
c52aaa
+	*elnum = total_elnum;
c52aaa
+	return base_elt;
c52aaa
+      }
c52aaa
+
c52aaa
+    default:
c52aaa
+      return 0;
c52aaa
+    }
c52aaa
+}
c52aaa
+#endif
c52aaa
+
c52aaa
+
c52aaa
 /* ffi_prep_args64 is called by the assembly routine once stack space
c52aaa
    has been allocated for the function's arguments.
c52aaa
 
c52aaa
@@ -437,6 +474,7 @@ ffi_prep_args64 (extended_cif *ecif, uns
c52aaa
     unsigned long *ul;
c52aaa
     float *f;
c52aaa
     double *d;
c52aaa
+    size_t p;
c52aaa
   } valp;
c52aaa
 
c52aaa
   /* 'stacktop' points at the previous backchain pointer.  */
c52aaa
@@ -452,9 +490,9 @@ ffi_prep_args64 (extended_cif *ecif, uns
c52aaa
   /* 'fpr_base' points at the space for fpr3, and grows upwards as
c52aaa
      we use FPR registers.  */
c52aaa
   valp fpr_base;
c52aaa
-  int fparg_count;
c52aaa
+  unsigned int fparg_count;
c52aaa
 
c52aaa
-  int i, words;
c52aaa
+  unsigned int i, words, nargs, nfixedargs;
c52aaa
   ffi_type **ptr;
c52aaa
   double double_tmp;
c52aaa
   union {
c52aaa
@@ -471,11 +509,18 @@ ffi_prep_args64 (extended_cif *ecif, uns
c52aaa
     double **d;
c52aaa
   } p_argv;
c52aaa
   unsigned long gprvalue;
c52aaa
+#ifdef __STRUCT_PARM_ALIGN__
c52aaa
+  unsigned long align;
c52aaa
+#endif
c52aaa
 
c52aaa
   stacktop.c = (char *) stack + bytes;
c52aaa
   gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
c52aaa
   gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
c52aaa
+#if _CALL_ELF == 2
c52aaa
+  rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
c52aaa
+#else
c52aaa
   rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
c52aaa
+#endif
c52aaa
   fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
c52aaa
   fparg_count = 0;
c52aaa
   next_arg.ul = gpr_base.ul;
c52aaa
@@ -491,30 +536,36 @@ ffi_prep_args64 (extended_cif *ecif, uns
c52aaa
 
c52aaa
   /* Now for the arguments.  */
c52aaa
   p_argv.v = ecif->avalue;
c52aaa
-  for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
c52aaa
-       i > 0;
c52aaa
-       i--, ptr++, p_argv.v++)
c52aaa
+  nargs = ecif->cif->nargs;
c52aaa
+  nfixedargs = ecif->cif->nfixedargs;
c52aaa
+  for (ptr = ecif->cif->arg_types, i = 0;
c52aaa
+       i < nargs;
c52aaa
+       i++, ptr++, p_argv.v++)
c52aaa
     {
c52aaa
+      unsigned int elt, elnum;
c52aaa
+
c52aaa
       switch ((*ptr)->type)
c52aaa
 	{
c52aaa
 	case FFI_TYPE_FLOAT:
c52aaa
 	  double_tmp = **p_argv.f;
c52aaa
-	  *next_arg.f = (float) double_tmp;
c52aaa
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
c52aaa
+	    *fpr_base.d++ = double_tmp;
c52aaa
+	  else
c52aaa
+	    *next_arg.f = (float) double_tmp;
c52aaa
 	  if (++next_arg.ul == gpr_end.ul)
c52aaa
 	    next_arg.ul = rest.ul;
c52aaa
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
c52aaa
-	    *fpr_base.d++ = double_tmp;
c52aaa
 	  fparg_count++;
c52aaa
 	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
c52aaa
 	  break;
c52aaa
 
c52aaa
 	case FFI_TYPE_DOUBLE:
c52aaa
 	  double_tmp = **p_argv.d;
c52aaa
-	  *next_arg.d = double_tmp;
c52aaa
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
c52aaa
+	    *fpr_base.d++ = double_tmp;
c52aaa
+	  else
c52aaa
+	    *next_arg.d = double_tmp;
c52aaa
 	  if (++next_arg.ul == gpr_end.ul)
c52aaa
 	    next_arg.ul = rest.ul;
c52aaa
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
c52aaa
-	    *fpr_base.d++ = double_tmp;
c52aaa
 	  fparg_count++;
c52aaa
 	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
c52aaa
 	  break;
c52aaa
@@ -522,18 +573,20 @@ ffi_prep_args64 (extended_cif *ecif, uns
c52aaa
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
c52aaa
 	case FFI_TYPE_LONGDOUBLE:
c52aaa
 	  double_tmp = (*p_argv.d)[0];
c52aaa
-	  *next_arg.d = double_tmp;
c52aaa
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
c52aaa
+	    *fpr_base.d++ = double_tmp;
c52aaa
+	  else
c52aaa
+	    *next_arg.d = double_tmp;
c52aaa
 	  if (++next_arg.ul == gpr_end.ul)
c52aaa
 	    next_arg.ul = rest.ul;
c52aaa
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
c52aaa
-	    *fpr_base.d++ = double_tmp;
c52aaa
 	  fparg_count++;
c52aaa
 	  double_tmp = (*p_argv.d)[1];
c52aaa
-	  *next_arg.d = double_tmp;
c52aaa
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
c52aaa
+	    *fpr_base.d++ = double_tmp;
c52aaa
+	  else
c52aaa
+	    *next_arg.d = double_tmp;
c52aaa
 	  if (++next_arg.ul == gpr_end.ul)
c52aaa
 	    next_arg.ul = rest.ul;
c52aaa
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
c52aaa
-	    *fpr_base.d++ = double_tmp;
c52aaa
 	  fparg_count++;
c52aaa
 	  FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
c52aaa
 	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
c52aaa
@@ -541,27 +594,86 @@ ffi_prep_args64 (extended_cif *ecif, uns
c52aaa
 #endif
c52aaa
 
c52aaa
 	case FFI_TYPE_STRUCT:
c52aaa
-	  words = ((*ptr)->size + 7) / 8;
c52aaa
-	  if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
c52aaa
-	    {
c52aaa
-	      size_t first = gpr_end.c - next_arg.c;
c52aaa
-	      memcpy (next_arg.c, *p_argv.c, first);
c52aaa
-	      memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
c52aaa
-	      next_arg.c = rest.c + words * 8 - first;
c52aaa
+#ifdef __STRUCT_PARM_ALIGN__
c52aaa
+	  align = (*ptr)->alignment;
c52aaa
+	  if (align > __STRUCT_PARM_ALIGN__)
c52aaa
+	    align = __STRUCT_PARM_ALIGN__;
c52aaa
+	  if (align > 1)
c52aaa
+	    next_arg.p = ALIGN (next_arg.p, align);
c52aaa
+#endif
c52aaa
+	  elt = 0;
c52aaa
+#if _CALL_ELF == 2
c52aaa
+	  elt = discover_homogeneous_aggregate (*ptr, &elnum);
c52aaa
+#endif
c52aaa
+	  if (elt)
c52aaa
+	    {
c52aaa
+	      union {
c52aaa
+		void *v;
c52aaa
+		float *f;
c52aaa
+		double *d;
c52aaa
+	      } arg;
c52aaa
+
c52aaa
+	      arg.v = *p_argv.v;
c52aaa
+	      if (elt == FFI_TYPE_FLOAT)
c52aaa
+		{
c52aaa
+		  do
c52aaa
+		    {
c52aaa
+		      double_tmp = *arg.f++;
c52aaa
+		      if (fparg_count < NUM_FPR_ARG_REGISTERS64
c52aaa
+			  && i < nfixedargs)
c52aaa
+			*fpr_base.d++ = double_tmp;
c52aaa
+		      else
c52aaa
+			*next_arg.f = (float) double_tmp;
c52aaa
+		      if (++next_arg.f == gpr_end.f)
c52aaa
+			next_arg.f = rest.f;
c52aaa
+		      fparg_count++;
c52aaa
+		    }
c52aaa
+		  while (--elnum != 0);
c52aaa
+		  if ((next_arg.p & 3) != 0)
c52aaa
+		    {
c52aaa
+		      if (++next_arg.f == gpr_end.f)
c52aaa
+			next_arg.f = rest.f;
c52aaa
+		    }
c52aaa
+		}
c52aaa
+	      else
c52aaa
+		do
c52aaa
+		  {
c52aaa
+		    double_tmp = *arg.d++;
c52aaa
+		    if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
c52aaa
+		      *fpr_base.d++ = double_tmp;
c52aaa
+		    else
c52aaa
+		      *next_arg.d = double_tmp;
c52aaa
+		    if (++next_arg.d == gpr_end.d)
c52aaa
+		      next_arg.d = rest.d;
c52aaa
+		    fparg_count++;
c52aaa
+		  }
c52aaa
+		while (--elnum != 0);
c52aaa
 	    }
c52aaa
 	  else
c52aaa
 	    {
c52aaa
-	      char *where = next_arg.c;
c52aaa
-
c52aaa
-	      /* Structures with size less than eight bytes are passed
c52aaa
-		 left-padded.  */
c52aaa
-	      if ((*ptr)->size < 8)
c52aaa
-		where += 8 - (*ptr)->size;
c52aaa
+	      words = ((*ptr)->size + 7) / 8;
c52aaa
+	      if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
c52aaa
+		{
c52aaa
+		  size_t first = gpr_end.c - next_arg.c;
c52aaa
+		  memcpy (next_arg.c, *p_argv.c, first);
c52aaa
+		  memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
c52aaa
+		  next_arg.c = rest.c + words * 8 - first;
c52aaa
+		}
c52aaa
+	      else
c52aaa
+		{
c52aaa
+		  char *where = next_arg.c;
c52aaa
 
c52aaa
-	      memcpy (where, *p_argv.c, (*ptr)->size);
c52aaa
-	      next_arg.ul += words;
c52aaa
-	      if (next_arg.ul == gpr_end.ul)
c52aaa
-		next_arg.ul = rest.ul;
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
+		  /* Structures with size less than eight bytes are passed
c52aaa
+		     left-padded.  */
c52aaa
+		  if ((*ptr)->size < 8)
c52aaa
+		    where += 8 - (*ptr)->size;
c52aaa
+#endif
c52aaa
+		  memcpy (where, *p_argv.c, (*ptr)->size);
c52aaa
+		  next_arg.ul += words;
c52aaa
+		  if (next_arg.ul == gpr_end.ul)
c52aaa
+		    next_arg.ul = rest.ul;
c52aaa
+		}
c52aaa
 	    }
c52aaa
 	  break;
c52aaa
 
c52aaa
@@ -605,24 +717,22 @@ ffi_prep_args64 (extended_cif *ecif, uns
c52aaa
 
c52aaa
 
c52aaa
 /* Perform machine dependent cif processing */
c52aaa
-ffi_status
c52aaa
-ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
+static ffi_status
c52aaa
+ffi_prep_cif_machdep_core (ffi_cif *cif)
c52aaa
 {
c52aaa
   /* All this is for the SYSV and LINUX64 ABI.  */
c52aaa
-  int i;
c52aaa
   ffi_type **ptr;
c52aaa
   unsigned bytes;
c52aaa
-  int fparg_count = 0, intarg_count = 0;
c52aaa
-  unsigned flags = 0;
c52aaa
+  unsigned i, fparg_count = 0, intarg_count = 0;
c52aaa
+  unsigned flags = cif->flags;
c52aaa
   unsigned struct_copy_size = 0;
c52aaa
   unsigned type = cif->rtype->type;
c52aaa
   unsigned size = cif->rtype->size;
c52aaa
 
c52aaa
+  /* The machine-independent calculation of cif->bytes doesn't work
c52aaa
+     for us.  Redo the calculation.  */
c52aaa
   if (cif->abi != FFI_LINUX64)
c52aaa
     {
c52aaa
-      /* All the machine-independent calculation of cif->bytes will be wrong.
c52aaa
-	 Redo the calculation for SYSV.  */
c52aaa
-
c52aaa
       /* Space for the frame pointer, callee's LR, and the asm's temp regs.  */
c52aaa
       bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
c52aaa
 
c52aaa
@@ -632,13 +742,20 @@ ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
   else
c52aaa
     {
c52aaa
       /* 64-bit ABI.  */
c52aaa
+#if _CALL_ELF == 2
c52aaa
+      /* Space for backchain, CR, LR, TOC and the asm's temp regs.  */
c52aaa
+      bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
c52aaa
 
c52aaa
+      /* Space for the general registers.  */
c52aaa
+      bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
c52aaa
+#else
c52aaa
       /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
c52aaa
 	 regs.  */
c52aaa
       bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
c52aaa
 
c52aaa
       /* Space for the mandatory parm save area and general registers.  */
c52aaa
       bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
c52aaa
+#endif
c52aaa
     }
c52aaa
 
c52aaa
   /* Return value handling.  The rules for SYSV are as follows:
c52aaa
@@ -658,19 +775,23 @@ ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
      - soft-float float/doubles are treated as UINT32/UINT64 respectivley.
c52aaa
      - soft-float long doubles are returned in gpr3-gpr6.  */
c52aaa
   /* First translate for softfloat/nonlinux */
c52aaa
-  if (cif->abi == FFI_LINUX_SOFT_FLOAT) {
c52aaa
-	if (type == FFI_TYPE_FLOAT)
c52aaa
-		type = FFI_TYPE_UINT32;
c52aaa
-	if (type == FFI_TYPE_DOUBLE)
c52aaa
-		type = FFI_TYPE_UINT64;
c52aaa
-	if (type == FFI_TYPE_LONGDOUBLE)
c52aaa
-		type = FFI_TYPE_UINT128;
c52aaa
-  } else if (cif->abi != FFI_LINUX && cif->abi != FFI_LINUX64) {
c52aaa
+  if (cif->abi == FFI_LINUX_SOFT_FLOAT)
c52aaa
+    {
c52aaa
+      if (type == FFI_TYPE_FLOAT)
c52aaa
+	type = FFI_TYPE_UINT32;
c52aaa
+      if (type == FFI_TYPE_DOUBLE)
c52aaa
+	type = FFI_TYPE_UINT64;
c52aaa
+      if (type == FFI_TYPE_LONGDOUBLE)
c52aaa
+	type = FFI_TYPE_UINT128;
c52aaa
+    }
c52aaa
+  else if (cif->abi != FFI_LINUX
c52aaa
+	   && cif->abi != FFI_LINUX64)
c52aaa
+    {
c52aaa
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
c52aaa
-	if (type == FFI_TYPE_LONGDOUBLE)
c52aaa
-		type = FFI_TYPE_STRUCT;
c52aaa
+      if (type == FFI_TYPE_LONGDOUBLE)
c52aaa
+	type = FFI_TYPE_STRUCT;
c52aaa
 #endif
c52aaa
-  }
c52aaa
+    }
c52aaa
 
c52aaa
   switch (type)
c52aaa
     {
c52aaa
@@ -697,35 +818,40 @@ ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
       break;
c52aaa
 
c52aaa
     case FFI_TYPE_STRUCT:
c52aaa
-      if (cif->abi == FFI_SYSV)
c52aaa
+      /*
c52aaa
+       * The final SYSV ABI says that structures smaller or equal 8 bytes
c52aaa
+       * are returned in r3/r4.  The FFI_GCC_SYSV ABI instead returns them
c52aaa
+       * in memory.
c52aaa
+       *
c52aaa
+       * NOTE: The assembly code can safely assume that it just needs to
c52aaa
+       *       store both r3 and r4 into a 8-byte word-aligned buffer, as
c52aaa
+       *       we allocate a temporary buffer in ffi_call() if this flag is
c52aaa
+       *       set.
c52aaa
+       */
c52aaa
+      if (cif->abi == FFI_SYSV && size <= 8)
c52aaa
 	{
c52aaa
-	  /* The final SYSV ABI says that structures smaller or equal 8 bytes
c52aaa
-	     are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
c52aaa
-	     in memory.  */
c52aaa
-
c52aaa
-	  /* Treat structs with size <= 8 bytes.  */
c52aaa
-	  if (size <= 8)
c52aaa
+	  flags |= FLAG_RETURNS_SMST;
c52aaa
+	  break;
c52aaa
+	}
c52aaa
+#if _CALL_ELF == 2
c52aaa
+      if (cif->abi == FFI_LINUX64)
c52aaa
+	{
c52aaa
+	  unsigned int elt, elnum;
c52aaa
+	  elt = discover_homogeneous_aggregate (cif->rtype, &elnum);
c52aaa
+	  if (elt)
c52aaa
+	    {
c52aaa
+	      if (elt == FFI_TYPE_DOUBLE)
c52aaa
+		flags |= FLAG_RETURNS_64BITS;
c52aaa
+	      flags |= FLAG_RETURNS_FP | FLAG_RETURNS_SMST;
c52aaa
+	      break;
c52aaa
+	    }
c52aaa
+	  if (size <= 16)
c52aaa
 	    {
c52aaa
 	      flags |= FLAG_RETURNS_SMST;
c52aaa
-	      /* These structs are returned in r3. We pack the type and the
c52aaa
-		 precalculated shift value (needed in the sysv.S) into flags.
c52aaa
-		 The same applies for the structs returned in r3/r4.  */
c52aaa
-	      if (size <= 4)
c52aaa
-		{
c52aaa
-		  flags |= FLAG_SYSV_SMST_R3;
c52aaa
-		  flags |= 8 * (4 - size) << 8;
c52aaa
-		  break;
c52aaa
-		}
c52aaa
-	      /* These structs are returned in r3 and r4. See above.   */
c52aaa
-	      if  (size <= 8)
c52aaa
-		{
c52aaa
-		  flags |= FLAG_SYSV_SMST_R3 | FLAG_SYSV_SMST_R4;
c52aaa
-		  flags |= 8 * (8 - size) << 8;
c52aaa
-		  break;
c52aaa
-		}
c52aaa
+	      break;
c52aaa
 	    }
c52aaa
 	}
c52aaa
-
c52aaa
+#endif
c52aaa
       intarg_count++;
c52aaa
       flags |= FLAG_RETVAL_REFERENCE;
c52aaa
       /* Fall through.  */
c52aaa
@@ -841,27 +967,54 @@ ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
   else
c52aaa
     for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
c52aaa
       {
c52aaa
+	unsigned int elt, elnum;
c52aaa
+#ifdef __STRUCT_PARM_ALIGN__
c52aaa
+	unsigned int align;
c52aaa
+#endif
c52aaa
+
c52aaa
 	switch ((*ptr)->type)
c52aaa
 	  {
c52aaa
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
c52aaa
 	  case FFI_TYPE_LONGDOUBLE:
c52aaa
-	    if (cif->abi == FFI_LINUX_SOFT_FLOAT)
c52aaa
-	      intarg_count += 4;
c52aaa
-	    else
c52aaa
-	      {
c52aaa
-		fparg_count += 2;
c52aaa
-		intarg_count += 2;
c52aaa
-	      }
c52aaa
+	    fparg_count += 2;
c52aaa
+	    intarg_count += 2;
c52aaa
+	    if (fparg_count > NUM_FPR_ARG_REGISTERS)
c52aaa
+	      flags |= FLAG_ARG_NEEDS_PSAVE;
c52aaa
 	    break;
c52aaa
 #endif
c52aaa
 	  case FFI_TYPE_FLOAT:
c52aaa
 	  case FFI_TYPE_DOUBLE:
c52aaa
 	    fparg_count++;
c52aaa
 	    intarg_count++;
c52aaa
+	    if (fparg_count > NUM_FPR_ARG_REGISTERS)
c52aaa
+	      flags |= FLAG_ARG_NEEDS_PSAVE;
c52aaa
 	    break;
c52aaa
 
c52aaa
 	  case FFI_TYPE_STRUCT:
c52aaa
+#ifdef __STRUCT_PARM_ALIGN__
c52aaa
+	    align = (*ptr)->alignment;
c52aaa
+	    if (align > __STRUCT_PARM_ALIGN__)
c52aaa
+	      align = __STRUCT_PARM_ALIGN__;
c52aaa
+	    align = align / 8;
c52aaa
+	    if (align > 1)
c52aaa
+	      intarg_count = ALIGN (intarg_count, align);
c52aaa
+#endif
c52aaa
 	    intarg_count += ((*ptr)->size + 7) / 8;
c52aaa
+	    elt = 0;
c52aaa
+#if _CALL_ELF == 2
c52aaa
+	    elt = discover_homogeneous_aggregate (*ptr, &elnum);
c52aaa
+#endif
c52aaa
+	    if (elt)
c52aaa
+	      {
c52aaa
+		fparg_count += elnum;
c52aaa
+		if (fparg_count > NUM_FPR_ARG_REGISTERS)
c52aaa
+		  flags |= FLAG_ARG_NEEDS_PSAVE;
c52aaa
+	      }
c52aaa
+	    else
c52aaa
+	      {
c52aaa
+		if (intarg_count > NUM_GPR_ARG_REGISTERS)
c52aaa
+		  flags |= FLAG_ARG_NEEDS_PSAVE;
c52aaa
+	      }
c52aaa
 	    break;
c52aaa
 
c52aaa
 	  case FFI_TYPE_POINTER:
c52aaa
@@ -877,9 +1030,11 @@ ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
 	    /* Everything else is passed as a 8-byte word in a GPR, either
c52aaa
 	       the object itself or a pointer to it.  */
c52aaa
 	    intarg_count++;
c52aaa
+	    if (intarg_count > NUM_GPR_ARG_REGISTERS)
c52aaa
+	      flags |= FLAG_ARG_NEEDS_PSAVE;
c52aaa
 	    break;
c52aaa
 	  default:
c52aaa
-		FFI_ASSERT (0);
c52aaa
+	    FFI_ASSERT (0);
c52aaa
 	  }
c52aaa
       }
c52aaa
 
c52aaa
@@ -917,8 +1072,13 @@ ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
 #endif
c52aaa
 
c52aaa
       /* Stack space.  */
c52aaa
+#if _CALL_ELF == 2
c52aaa
+      if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
c52aaa
+	bytes += intarg_count * sizeof (long);
c52aaa
+#else
c52aaa
       if (intarg_count > NUM_GPR_ARG_REGISTERS64)
c52aaa
 	bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
c52aaa
+#endif
c52aaa
     }
c52aaa
 
c52aaa
   /* The stack space allocated needs to be a multiple of 16 bytes.  */
c52aaa
@@ -933,6 +1093,26 @@ ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
   return FFI_OK;
c52aaa
 }
c52aaa
 
c52aaa
+ffi_status
c52aaa
+ffi_prep_cif_machdep (ffi_cif *cif)
c52aaa
+{
c52aaa
+  cif->nfixedargs = cif->nargs;
c52aaa
+  return ffi_prep_cif_machdep_core (cif);
c52aaa
+}
c52aaa
+
c52aaa
+ffi_status
c52aaa
+ffi_prep_cif_machdep_var (ffi_cif *cif,
c52aaa
+			  unsigned int nfixedargs,
c52aaa
+			  unsigned int ntotalargs MAYBE_UNUSED)
c52aaa
+{
c52aaa
+  cif->nfixedargs = nfixedargs;
c52aaa
+#if _CALL_ELF == 2
c52aaa
+  if (cif->abi == FFI_LINUX64)
c52aaa
+    cif->flags |= FLAG_ARG_NEEDS_PSAVE;
c52aaa
+#endif
c52aaa
+  return ffi_prep_cif_machdep_core (cif);
c52aaa
+}
c52aaa
+
c52aaa
 extern void ffi_call_SYSV(extended_cif *, unsigned, unsigned, unsigned *,
c52aaa
 			  void (*fn)(void));
c52aaa
 extern void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, unsigned long,
c52aaa
@@ -944,30 +1124,28 @@ ffi_call(ffi_cif *cif, void (*fn)(void),
c52aaa
 {
c52aaa
   /*
c52aaa
    * The final SYSV ABI says that structures smaller or equal 8 bytes
c52aaa
-   * are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
c52aaa
+   * are returned in r3/r4.  The FFI_GCC_SYSV ABI instead returns them
c52aaa
    * in memory.
c52aaa
    *
c52aaa
-   * Just to keep things simple for the assembly code, we will always
c52aaa
-   * bounce-buffer struct return values less than or equal to 8 bytes.
c52aaa
-   * This allows the ASM to handle SYSV small structures by directly
c52aaa
-   * writing r3 and r4 to memory without worrying about struct size.
c52aaa
+   * We bounce-buffer SYSV small struct return values so that sysv.S
c52aaa
+   * can write r3 and r4 to memory without worrying about struct size.
c52aaa
+   *
c52aaa
+   * For ELFv2 ABI, use a bounce buffer for homogeneous structs too,
c52aaa
+   * for similar reasons.
c52aaa
    */
c52aaa
-  unsigned int smst_buffer[2];
c52aaa
+  unsigned long smst_buffer[8];
c52aaa
   extended_cif ecif;
c52aaa
-  unsigned int rsize = 0;
c52aaa
 
c52aaa
   ecif.cif = cif;
c52aaa
   ecif.avalue = avalue;
c52aaa
 
c52aaa
-  /* Ensure that we have a valid struct return value */
c52aaa
   ecif.rvalue = rvalue;
c52aaa
-  if (cif->rtype->type == FFI_TYPE_STRUCT) {
c52aaa
-    rsize = cif->rtype->size;
c52aaa
-    if (rsize <= 8)
c52aaa
-      ecif.rvalue = smst_buffer;
c52aaa
-    else if (!rvalue)
c52aaa
-      ecif.rvalue = alloca(rsize);
c52aaa
-  }
c52aaa
+  if ((cif->flags & FLAG_RETURNS_SMST) != 0)
c52aaa
+    ecif.rvalue = smst_buffer;
c52aaa
+  /* Ensure that we have a valid struct return value.
c52aaa
+     FIXME: Isn't this just papering over a user problem?  */
c52aaa
+  else if (!rvalue && cif->rtype->type == FFI_TYPE_STRUCT)
c52aaa
+    ecif.rvalue = alloca (cif->rtype->size);
c52aaa
 
c52aaa
   switch (cif->abi)
c52aaa
     {
c52aaa
@@ -992,11 +1170,26 @@ ffi_call(ffi_cif *cif, void (*fn)(void),
c52aaa
 
c52aaa
   /* Check for a bounce-buffered return value */
c52aaa
   if (rvalue && ecif.rvalue == smst_buffer)
c52aaa
-    memcpy(rvalue, smst_buffer, rsize);
c52aaa
+    {
c52aaa
+      unsigned int rsize = cif->rtype->size;
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
+      /* The SYSV ABI returns a structure of up to 4 bytes in size
c52aaa
+	 left-padded in r3.  */
c52aaa
+      if (cif->abi == FFI_SYSV && rsize <= 4)
c52aaa
+	memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize);
c52aaa
+      /* The SYSV ABI returns a structure of up to 8 bytes in size
c52aaa
+	 left-padded in r3/r4, and the ELFv2 ABI similarly returns a
c52aaa
+	 structure of up to 8 bytes in size left-padded in r3.  */
c52aaa
+      else if (rsize <= 8)
c52aaa
+	memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
c52aaa
+      else
c52aaa
+#endif
c52aaa
+	memcpy (rvalue, smst_buffer, rsize);
c52aaa
+    }
c52aaa
 }
c52aaa
 
c52aaa
 
c52aaa
-#ifndef POWERPC64
c52aaa
+#if !defined POWERPC64 || _CALL_ELF == 2
c52aaa
 #define MIN_CACHE_LINE_SIZE 8
c52aaa
 
c52aaa
 static void
c52aaa
@@ -1020,6 +1213,22 @@ ffi_prep_closure_loc (ffi_closure *closu
c52aaa
 		      void *codeloc)
c52aaa
 {
c52aaa
 #ifdef POWERPC64
c52aaa
+# if _CALL_ELF == 2
c52aaa
+  unsigned int *tramp = (unsigned int *) &closure->tramp[0];
c52aaa
+
c52aaa
+  if (cif->abi != FFI_LINUX64)
c52aaa
+    return FFI_BAD_ABI;
c52aaa
+
c52aaa
+  tramp[0] = 0xe96c0018;	/* 0:	ld	11,2f-0b(12)	*/
c52aaa
+  tramp[1] = 0xe98c0010;	/*	ld	12,1f-0b(12)	*/
c52aaa
+  tramp[2] = 0x7d8903a6;	/*	mtctr	12		*/
c52aaa
+  tramp[3] = 0x4e800420;	/*	bctr			*/
c52aaa
+				/* 1:	.quad	function_addr	*/
c52aaa
+				/* 2:	.quad	context		*/
c52aaa
+  *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
c52aaa
+  *(void **) &tramp[6] = codeloc;
c52aaa
+  flush_icache ((char *)tramp, (char *)codeloc, FFI_TRAMPOLINE_SIZE);
c52aaa
+# else
c52aaa
   void **tramp = (void **) &closure->tramp[0];
c52aaa
 
c52aaa
   if (cif->abi != FFI_LINUX64)
c52aaa
@@ -1027,6 +1236,7 @@ ffi_prep_closure_loc (ffi_closure *closu
c52aaa
   /* Copy function address and TOC from ffi_closure_LINUX64.  */
c52aaa
   memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
c52aaa
   tramp[2] = codeloc;
c52aaa
+# endif
c52aaa
 #else
c52aaa
   unsigned int *tramp;
c52aaa
 
c52aaa
@@ -1236,6 +1446,7 @@ ffi_closure_helper_SYSV (ffi_closure *cl
c52aaa
 
c52aaa
 	case FFI_TYPE_SINT8:
c52aaa
 	case FFI_TYPE_UINT8:
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
 	  /* there are 8 gpr registers used to pass values */
c52aaa
 	  if (ng < 8)
c52aaa
 	    {
c52aaa
@@ -1249,9 +1460,11 @@ ffi_closure_helper_SYSV (ffi_closure *cl
c52aaa
 	      pst++;
c52aaa
 	    }
c52aaa
 	  break;
c52aaa
+#endif
c52aaa
 
c52aaa
 	case FFI_TYPE_SINT16:
c52aaa
 	case FFI_TYPE_UINT16:
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
 	  /* there are 8 gpr registers used to pass values */
c52aaa
 	  if (ng < 8)
c52aaa
 	    {
c52aaa
@@ -1265,6 +1478,7 @@ ffi_closure_helper_SYSV (ffi_closure *cl
c52aaa
 	      pst++;
c52aaa
 	    }
c52aaa
 	  break;
c52aaa
+#endif
c52aaa
 
c52aaa
 	case FFI_TYPE_SINT32:
c52aaa
 	case FFI_TYPE_UINT32:
c52aaa
@@ -1369,16 +1583,20 @@ ffi_closure_helper_LINUX64 (ffi_closure
c52aaa
 
c52aaa
   void **avalue;
c52aaa
   ffi_type **arg_types;
c52aaa
-  long i, avn;
c52aaa
+  unsigned long i, avn, nfixedargs;
c52aaa
   ffi_cif *cif;
c52aaa
   ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
c52aaa
+#ifdef __STRUCT_PARM_ALIGN__
c52aaa
+  unsigned long align;
c52aaa
+#endif
c52aaa
 
c52aaa
   cif = closure->cif;
c52aaa
   avalue = alloca (cif->nargs * sizeof (void *));
c52aaa
 
c52aaa
-  /* Copy the caller's structure return value address so that the closure
c52aaa
-     returns the data directly to the caller.  */
c52aaa
-  if (cif->rtype->type == FFI_TYPE_STRUCT)
c52aaa
+  /* Copy the caller's structure return value address so that the
c52aaa
+     closure returns the data directly to the caller.  */
c52aaa
+  if (cif->rtype->type == FFI_TYPE_STRUCT
c52aaa
+      && (cif->flags & FLAG_RETURNS_SMST) == 0)
c52aaa
     {
c52aaa
       rvalue = (void *) *pst;
c52aaa
       pst++;
c52aaa
@@ -1386,30 +1604,39 @@ ffi_closure_helper_LINUX64 (ffi_closure
c52aaa
 
c52aaa
   i = 0;
c52aaa
   avn = cif->nargs;
c52aaa
+  nfixedargs = cif->nfixedargs;
c52aaa
   arg_types = cif->arg_types;
c52aaa
 
c52aaa
   /* Grab the addresses of the arguments from the stack frame.  */
c52aaa
   while (i < avn)
c52aaa
     {
c52aaa
+      unsigned int elt, elnum;
c52aaa
+
c52aaa
       switch (arg_types[i]->type)
c52aaa
 	{
c52aaa
 	case FFI_TYPE_SINT8:
c52aaa
 	case FFI_TYPE_UINT8:
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
 	  avalue[i] = (char *) pst + 7;
c52aaa
 	  pst++;
c52aaa
 	  break;
c52aaa
+#endif
c52aaa
 
c52aaa
 	case FFI_TYPE_SINT16:
c52aaa
 	case FFI_TYPE_UINT16:
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
 	  avalue[i] = (char *) pst + 6;
c52aaa
 	  pst++;
c52aaa
 	  break;
c52aaa
+#endif
c52aaa
 
c52aaa
 	case FFI_TYPE_SINT32:
c52aaa
 	case FFI_TYPE_UINT32:
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
 	  avalue[i] = (char *) pst + 4;
c52aaa
 	  pst++;
c52aaa
 	  break;
c52aaa
+#endif
c52aaa
 
c52aaa
 	case FFI_TYPE_SINT64:
c52aaa
 	case FFI_TYPE_UINT64:
c52aaa
@@ -1419,12 +1646,82 @@ ffi_closure_helper_LINUX64 (ffi_closure
c52aaa
 	  break;
c52aaa
 
c52aaa
 	case FFI_TYPE_STRUCT:
c52aaa
-	  /* Structures with size less than eight bytes are passed
c52aaa
-	     left-padded.  */
c52aaa
-	  if (arg_types[i]->size < 8)
c52aaa
-	    avalue[i] = (char *) pst + 8 - arg_types[i]->size;
c52aaa
+#ifdef __STRUCT_PARM_ALIGN__
c52aaa
+	  align = arg_types[i]->alignment;
c52aaa
+	  if (align > __STRUCT_PARM_ALIGN__)
c52aaa
+	    align = __STRUCT_PARM_ALIGN__;
c52aaa
+	  if (align > 1)
c52aaa
+	    pst = (unsigned long *) ALIGN ((size_t) pst, align);
c52aaa
+#endif
c52aaa
+	  elt = 0;
c52aaa
+#if _CALL_ELF == 2
c52aaa
+	  elt = discover_homogeneous_aggregate (arg_types[i], &elnum);
c52aaa
+#endif
c52aaa
+	  if (elt)
c52aaa
+	    {
c52aaa
+	      union {
c52aaa
+		void *v;
c52aaa
+		unsigned long *ul;
c52aaa
+		float *f;
c52aaa
+		double *d;
c52aaa
+		size_t p;
c52aaa
+	      } to, from;
c52aaa
+
c52aaa
+	      /* Repackage the aggregate from its parts.  The
c52aaa
+		 aggregate size is not greater than the space taken by
c52aaa
+		 the registers so store back to the register/parameter
c52aaa
+		 save arrays.  */
c52aaa
+	      if (pfr + elnum <= end_pfr)
c52aaa
+		to.v = pfr;
c52aaa
+	      else
c52aaa
+		to.v = pst;
c52aaa
+
c52aaa
+	      avalue[i] = to.v;
c52aaa
+	      from.ul = pst;
c52aaa
+	      if (elt == FFI_TYPE_FLOAT)
c52aaa
+		{
c52aaa
+		  do
c52aaa
+		    {
c52aaa
+		      if (pfr < end_pfr && i < nfixedargs)
c52aaa
+			{
c52aaa
+			  *to.f = (float) pfr->d;
c52aaa
+			  pfr++;
c52aaa
+			}
c52aaa
+		      else
c52aaa
+			*to.f = *from.f;
c52aaa
+		      to.f++;
c52aaa
+		      from.f++;
c52aaa
+		    }
c52aaa
+		  while (--elnum != 0);
c52aaa
+		}
c52aaa
+	      else
c52aaa
+		{
c52aaa
+		  do
c52aaa
+		    {
c52aaa
+		      if (pfr < end_pfr && i < nfixedargs)
c52aaa
+			{
c52aaa
+			  *to.d = pfr->d;
c52aaa
+			  pfr++;
c52aaa
+			}
c52aaa
+		      else
c52aaa
+			*to.d = *from.d;
c52aaa
+		      to.d++;
c52aaa
+		      from.d++;
c52aaa
+		    }
c52aaa
+		  while (--elnum != 0);
c52aaa
+		}
c52aaa
+	    }
c52aaa
 	  else
c52aaa
-	    avalue[i] = pst;
c52aaa
+	    {
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
+	      /* Structures with size less than eight bytes are passed
c52aaa
+		 left-padded.  */
c52aaa
+	      if (arg_types[i]->size < 8)
c52aaa
+		avalue[i] = (char *) pst + 8 - arg_types[i]->size;
c52aaa
+	      else
c52aaa
+#endif
c52aaa
+		avalue[i] = pst;
c52aaa
+	    }
c52aaa
 	  pst += (arg_types[i]->size + 7) / 8;
c52aaa
 	  break;
c52aaa
 
c52aaa
@@ -1436,7 +1733,7 @@ ffi_closure_helper_LINUX64 (ffi_closure
c52aaa
 
c52aaa
 	  /* there are 13 64bit floating point registers */
c52aaa
 
c52aaa
-	  if (pfr < end_pfr)
c52aaa
+	  if (pfr < end_pfr && i < nfixedargs)
c52aaa
 	    {
c52aaa
 	      double temp = pfr->d;
c52aaa
 	      pfr->f = (float) temp;
c52aaa
@@ -1452,7 +1749,7 @@ ffi_closure_helper_LINUX64 (ffi_closure
c52aaa
 	  /* On the outgoing stack all values are aligned to 8 */
c52aaa
 	  /* there are 13 64bit floating point registers */
c52aaa
 
c52aaa
-	  if (pfr < end_pfr)
c52aaa
+	  if (pfr < end_pfr && i < nfixedargs)
c52aaa
 	    {
c52aaa
 	      avalue[i] = pfr;
c52aaa
 	      pfr++;
c52aaa
@@ -1464,14 +1761,14 @@ ffi_closure_helper_LINUX64 (ffi_closure
c52aaa
 
c52aaa
 #if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
c52aaa
 	case FFI_TYPE_LONGDOUBLE:
c52aaa
-	  if (pfr + 1 < end_pfr)
c52aaa
+	  if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
c52aaa
 	    {
c52aaa
 	      avalue[i] = pfr;
c52aaa
 	      pfr += 2;
c52aaa
 	    }
c52aaa
 	  else
c52aaa
 	    {
c52aaa
-	      if (pfr < end_pfr)
c52aaa
+	      if (pfr < end_pfr && i < nfixedargs)
c52aaa
 		{
c52aaa
 		  /* Passed partly in f13 and partly on the stack.
c52aaa
 		     Move it all to the stack.  */
c52aaa
@@ -1495,5 +1792,14 @@ ffi_closure_helper_LINUX64 (ffi_closure
c52aaa
   (closure->fun) (cif, rvalue, avalue, closure->user_data);
c52aaa
 
c52aaa
   /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */
c52aaa
+  if ((cif->flags & FLAG_RETURNS_SMST) != 0)
c52aaa
+    {
c52aaa
+      if ((cif->flags & FLAG_RETURNS_FP) == 0)
c52aaa
+	return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
c52aaa
+      else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
c52aaa
+	return FFI_V2_TYPE_DOUBLE_HOMOG;
c52aaa
+      else
c52aaa
+	return FFI_V2_TYPE_FLOAT_HOMOG;
c52aaa
+    }
c52aaa
   return cif->rtype->type;
c52aaa
 }
c52aaa
diff -urp libffi-3.0.13/src/powerpc/ffitarget.h libffi-current/src/powerpc/ffitarget.h
c52aaa
--- libffi-3.0.13/src/powerpc/ffitarget.h	2013-03-16 21:49:39.000000000 +1030
c52aaa
+++ libffi-current/src/powerpc/ffitarget.h	2013-11-17 09:07:45.433681274 +1030
c52aaa
@@ -106,6 +106,10 @@ typedef enum ffi_abi {
c52aaa
 
c52aaa
 #define FFI_CLOSURES 1
c52aaa
 #define FFI_NATIVE_RAW_API 0
c52aaa
+#if defined (POWERPC) || defined (POWERPC_FREEBSD)
c52aaa
+# define FFI_TARGET_SPECIFIC_VARIADIC 1
c52aaa
+# define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs
c52aaa
+#endif
c52aaa
 
c52aaa
 /* For additional types like the below, take care about the order in
c52aaa
    ppc_closures.S. They must follow after the FFI_TYPE_LAST.  */
c52aaa
@@ -118,14 +122,23 @@ typedef enum ffi_abi {
c52aaa
    defined in ffi.c, to determine the exact return type and its size.  */
c52aaa
 #define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_TYPE_LAST + 2)
c52aaa
 
c52aaa
-#if defined(POWERPC64) || defined(POWERPC_AIX)
c52aaa
+/* Used by ELFv2 for homogenous structure returns.  */
c52aaa
+#define FFI_V2_TYPE_FLOAT_HOMOG		(FFI_TYPE_LAST + 1)
c52aaa
+#define FFI_V2_TYPE_DOUBLE_HOMOG	(FFI_TYPE_LAST + 2)
c52aaa
+#define FFI_V2_TYPE_SMALL_STRUCT	(FFI_TYPE_LAST + 3)
c52aaa
+
c52aaa
+#if _CALL_ELF == 2
c52aaa
+# define FFI_TRAMPOLINE_SIZE 32
c52aaa
+#else
c52aaa
+# if defined(POWERPC64) || defined(POWERPC_AIX)
c52aaa
 #  if defined(POWERPC_DARWIN64)
c52aaa
 #    define FFI_TRAMPOLINE_SIZE 48
c52aaa
 #  else
c52aaa
 #    define FFI_TRAMPOLINE_SIZE 24
c52aaa
 #  endif
c52aaa
-#else /* POWERPC || POWERPC_AIX */
c52aaa
+# else /* POWERPC || POWERPC_AIX */
c52aaa
 #  define FFI_TRAMPOLINE_SIZE 40
c52aaa
+# endif
c52aaa
 #endif
c52aaa
 
c52aaa
 #ifndef LIBFFI_ASM
c52aaa
diff -urp libffi-3.0.13/src/powerpc/linux64_closure.S libffi-current/src/powerpc/linux64_closure.S
c52aaa
--- libffi-3.0.13/src/powerpc/linux64_closure.S	2013-03-16 21:49:39.000000000 +1030
c52aaa
+++ libffi-current/src/powerpc/linux64_closure.S	2013-11-17 09:11:54.267742403 +1030
c52aaa
@@ -33,15 +33,22 @@
c52aaa
 #ifdef __powerpc64__
c52aaa
 	FFI_HIDDEN (ffi_closure_LINUX64)
c52aaa
 	.globl  ffi_closure_LINUX64
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	.text
c52aaa
+ffi_closure_LINUX64:
c52aaa
+	addis	%r2, %r12, .TOC.-ffi_closure_LINUX64@ha
c52aaa
+	addi	%r2, %r2, .TOC.-ffi_closure_LINUX64@l
c52aaa
+	.localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64
c52aaa
+# else
c52aaa
 	.section        ".opd","aw"
c52aaa
 	.align  3
c52aaa
 ffi_closure_LINUX64:
c52aaa
-#ifdef _CALL_LINUX
c52aaa
+#  ifdef _CALL_LINUX
c52aaa
 	.quad   .L.ffi_closure_LINUX64,.TOC.@tocbase,0
c52aaa
 	.type   ffi_closure_LINUX64,@function
c52aaa
 	.text
c52aaa
 .L.ffi_closure_LINUX64:
c52aaa
-#else
c52aaa
+#  else
c52aaa
 	FFI_HIDDEN (.ffi_closure_LINUX64)
c52aaa
 	.globl  .ffi_closure_LINUX64
c52aaa
 	.quad   .ffi_closure_LINUX64,.TOC.@tocbase,0
c52aaa
@@ -49,61 +56,103 @@ ffi_closure_LINUX64:
c52aaa
 	.type   .ffi_closure_LINUX64,@function
c52aaa
 	.text
c52aaa
 .ffi_closure_LINUX64:
c52aaa
-#endif
c52aaa
+#  endif
c52aaa
+# endif
c52aaa
+
c52aaa
+# if _CALL_ELF == 2
c52aaa
+#  32 byte special reg save area + 64 byte parm save area and retval
c52aaa
+#  + 13*8 fpr save area + round to 16
c52aaa
+#  define STACKFRAME 208
c52aaa
+#  define PARMSAVE 32
c52aaa
+#  No parameter save area is needed for the call to ffi_closure_helper_LINUX64,
c52aaa
+#  so return value can start there.
c52aaa
+#  define RETVAL PARMSAVE
c52aaa
+# else
c52aaa
+#  48 bytes special reg save area + 64 bytes parm save area
c52aaa
+#  + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
c52aaa
+#  define STACKFRAME 240
c52aaa
+#  define PARMSAVE 48
c52aaa
+#  define RETVAL PARMSAVE+64
c52aaa
+# endif
c52aaa
+
c52aaa
 .LFB1:
c52aaa
-	# save general regs into parm save area
c52aaa
-	std	%r3, 48(%r1)
c52aaa
-	std	%r4, 56(%r1)
c52aaa
-	std	%r5, 64(%r1)
c52aaa
-	std	%r6, 72(%r1)
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	ld	%r12, FFI_TRAMPOLINE_SIZE(%r11)		# closure->cif
c52aaa
+	mflr	%r0
c52aaa
+	lwz	%r12, 28(%r12)				# cif->flags
c52aaa
+	mtcrf	0x40, %r12
c52aaa
+	addi	%r12, %r1, PARMSAVE
c52aaa
+	bt	7, .Lparmsave
c52aaa
+	# Our caller has not allocated a parameter save area.
c52aaa
+	# We need to allocate one here and use it to pass gprs to
c52aaa
+	# ffi_closure_helper_LINUX64.  The return value area will do.
c52aaa
+	addi	%r12, %r1, -STACKFRAME+RETVAL
c52aaa
+.Lparmsave:
c52aaa
+	std	%r0, 16(%r1)
c52aaa
+	# Save general regs into parm save area
c52aaa
+	std	%r3, 0(%r12)
c52aaa
+	std	%r4, 8(%r12)
c52aaa
+	std	%r5, 16(%r12)
c52aaa
+	std	%r6, 24(%r12)
c52aaa
+	std	%r7, 32(%r12)
c52aaa
+	std	%r8, 40(%r12)
c52aaa
+	std	%r9, 48(%r12)
c52aaa
+	std	%r10, 56(%r12)
c52aaa
+
c52aaa
+	# load up the pointer to the parm save area
c52aaa
+	mr	%r5, %r12
c52aaa
+# else
c52aaa
 	mflr	%r0
c52aaa
+	# Save general regs into parm save area
c52aaa
+	# This is the parameter save area set up by our caller.
c52aaa
+	std	%r3, PARMSAVE+0(%r1)
c52aaa
+	std	%r4, PARMSAVE+8(%r1)
c52aaa
+	std	%r5, PARMSAVE+16(%r1)
c52aaa
+	std	%r6, PARMSAVE+24(%r1)
c52aaa
+	std	%r7, PARMSAVE+32(%r1)
c52aaa
+	std	%r8, PARMSAVE+40(%r1)
c52aaa
+	std	%r9, PARMSAVE+48(%r1)
c52aaa
+	std	%r10, PARMSAVE+56(%r1)
c52aaa
 
c52aaa
-	std	%r7, 80(%r1)
c52aaa
-	std	%r8, 88(%r1)
c52aaa
-	std	%r9, 96(%r1)
c52aaa
-	std	%r10, 104(%r1)
c52aaa
 	std	%r0, 16(%r1)
c52aaa
 
c52aaa
-	# mandatory 48 bytes special reg save area + 64 bytes parm save area
c52aaa
-	# + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
c52aaa
-	stdu	%r1, -240(%r1)
c52aaa
-.LCFI0:
c52aaa
+	# load up the pointer to the parm save area
c52aaa
+	addi	%r5, %r1, PARMSAVE
c52aaa
+# endif
c52aaa
 
c52aaa
 	# next save fpr 1 to fpr 13
c52aaa
-	stfd  %f1, 128+(0*8)(%r1)
c52aaa
-	stfd  %f2, 128+(1*8)(%r1)
c52aaa
-	stfd  %f3, 128+(2*8)(%r1)
c52aaa
-	stfd  %f4, 128+(3*8)(%r1)
c52aaa
-	stfd  %f5, 128+(4*8)(%r1)
c52aaa
-	stfd  %f6, 128+(5*8)(%r1)
c52aaa
-	stfd  %f7, 128+(6*8)(%r1)
c52aaa
-	stfd  %f8, 128+(7*8)(%r1)
c52aaa
-	stfd  %f9, 128+(8*8)(%r1)
c52aaa
-	stfd  %f10, 128+(9*8)(%r1)
c52aaa
-	stfd  %f11, 128+(10*8)(%r1)
c52aaa
-	stfd  %f12, 128+(11*8)(%r1)
c52aaa
-	stfd  %f13, 128+(12*8)(%r1)
c52aaa
+	stfd	%f1, -104+(0*8)(%r1)
c52aaa
+	stfd	%f2, -104+(1*8)(%r1)
c52aaa
+	stfd	%f3, -104+(2*8)(%r1)
c52aaa
+	stfd	%f4, -104+(3*8)(%r1)
c52aaa
+	stfd	%f5, -104+(4*8)(%r1)
c52aaa
+	stfd	%f6, -104+(5*8)(%r1)
c52aaa
+	stfd	%f7, -104+(6*8)(%r1)
c52aaa
+	stfd	%f8, -104+(7*8)(%r1)
c52aaa
+	stfd	%f9, -104+(8*8)(%r1)
c52aaa
+	stfd	%f10, -104+(9*8)(%r1)
c52aaa
+	stfd	%f11, -104+(10*8)(%r1)
c52aaa
+	stfd	%f12, -104+(11*8)(%r1)
c52aaa
+	stfd	%f13, -104+(12*8)(%r1)
c52aaa
 
c52aaa
-	# set up registers for the routine that actually does the work
c52aaa
-	# get the context pointer from the trampoline
c52aaa
-	mr %r3, %r11
c52aaa
+	# load up the pointer to the saved fpr registers */
c52aaa
+	addi	%r6, %r1, -104
c52aaa
 
c52aaa
-	# now load up the pointer to the result storage
c52aaa
-	addi %r4, %r1, 112
c52aaa
+	# load up the pointer to the result storage
c52aaa
+	addi	%r4, %r1, -STACKFRAME+RETVAL
c52aaa
 
c52aaa
-	# now load up the pointer to the parameter save area
c52aaa
-	# in the previous frame
c52aaa
-	addi %r5, %r1, 240 + 48
c52aaa
+	stdu	%r1, -STACKFRAME(%r1)
c52aaa
+.LCFI0:
c52aaa
 
c52aaa
-	# now load up the pointer to the saved fpr registers */
c52aaa
-	addi %r6, %r1, 128
c52aaa
+	# get the context pointer from the trampoline
c52aaa
+	mr	%r3, %r11
c52aaa
 
c52aaa
 	# make the call
c52aaa
-#ifdef _CALL_LINUX
c52aaa
+# if defined _CALL_LINUX || _CALL_ELF == 2
c52aaa
 	bl ffi_closure_helper_LINUX64
c52aaa
-#else
c52aaa
+# else
c52aaa
 	bl .ffi_closure_helper_LINUX64
c52aaa
-#endif
c52aaa
+# endif
c52aaa
 .Lret:
c52aaa
 
c52aaa
 	# now r3 contains the return type
c52aaa
@@ -112,10 +161,12 @@ ffi_closure_LINUX64:
c52aaa
 
c52aaa
 	# look up the proper starting point in table
c52aaa
 	# by using return type as offset
c52aaa
+	ld %r0, STACKFRAME+16(%r1)
c52aaa
+	cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT
c52aaa
+	bge .Lsmall
c52aaa
 	mflr %r4		# move address of .Lret to r4
c52aaa
 	sldi %r3, %r3, 4	# now multiply return type by 16
c52aaa
 	addi %r4, %r4, .Lret_type0 - .Lret
c52aaa
-	ld %r0, 240+16(%r1)
c52aaa
 	add %r3, %r3, %r4	# add contents of table to table address
c52aaa
 	mtctr %r3
c52aaa
 	bctr			# jump to it
c52aaa
@@ -128,89 +179,175 @@ ffi_closure_LINUX64:
c52aaa
 .Lret_type0:
c52aaa
 # case FFI_TYPE_VOID
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 	nop
c52aaa
 # case FFI_TYPE_INT
c52aaa
-	lwa %r3, 112+4(%r1)
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	lwa %r3, RETVAL+0(%r1)
c52aaa
+# else
c52aaa
+	lwa %r3, RETVAL+4(%r1)
c52aaa
+# endif
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_FLOAT
c52aaa
-	lfs %f1, 112+0(%r1)
c52aaa
+	lfs %f1, RETVAL+0(%r1)
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_DOUBLE
c52aaa
-	lfd %f1, 112+0(%r1)
c52aaa
+	lfd %f1, RETVAL+0(%r1)
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_LONGDOUBLE
c52aaa
-	lfd %f1, 112+0(%r1)
c52aaa
+	lfd %f1, RETVAL+0(%r1)
c52aaa
 	mtlr %r0
c52aaa
-	lfd %f2, 112+8(%r1)
c52aaa
+	lfd %f2, RETVAL+8(%r1)
c52aaa
 	b .Lfinish
c52aaa
 # case FFI_TYPE_UINT8
c52aaa
-	lbz %r3, 112+7(%r1)
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	lbz %r3, RETVAL+0(%r1)
c52aaa
+# else
c52aaa
+	lbz %r3, RETVAL+7(%r1)
c52aaa
+# endif
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_SINT8
c52aaa
-	lbz %r3, 112+7(%r1)
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	lbz %r3, RETVAL+0(%r1)
c52aaa
+# else
c52aaa
+	lbz %r3, RETVAL+7(%r1)
c52aaa
+# endif
c52aaa
 	extsb %r3,%r3
c52aaa
 	mtlr %r0
c52aaa
 	b .Lfinish
c52aaa
 # case FFI_TYPE_UINT16
c52aaa
-	lhz %r3, 112+6(%r1)
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	lhz %r3, RETVAL+0(%r1)
c52aaa
+# else
c52aaa
+	lhz %r3, RETVAL+6(%r1)
c52aaa
+# endif
c52aaa
 	mtlr %r0
c52aaa
 .Lfinish:
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_SINT16
c52aaa
-	lha %r3, 112+6(%r1)
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	lha %r3, RETVAL+0(%r1)
c52aaa
+# else
c52aaa
+	lha %r3, RETVAL+6(%r1)
c52aaa
+# endif
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_UINT32
c52aaa
-	lwz %r3, 112+4(%r1)
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	lwz %r3, RETVAL+0(%r1)
c52aaa
+# else
c52aaa
+	lwz %r3, RETVAL+4(%r1)
c52aaa
+# endif
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_SINT32
c52aaa
-	lwa %r3, 112+4(%r1)
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	lwa %r3, RETVAL+0(%r1)
c52aaa
+# else
c52aaa
+	lwa %r3, RETVAL+4(%r1)
c52aaa
+# endif
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_UINT64
c52aaa
-	ld %r3, 112+0(%r1)
c52aaa
+	ld %r3, RETVAL+0(%r1)
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_SINT64
c52aaa
-	ld %r3, 112+0(%r1)
c52aaa
+	ld %r3, RETVAL+0(%r1)
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 # case FFI_TYPE_STRUCT
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
 	nop
c52aaa
 # case FFI_TYPE_POINTER
c52aaa
-	ld %r3, 112+0(%r1)
c52aaa
+	ld %r3, RETVAL+0(%r1)
c52aaa
 	mtlr %r0
c52aaa
-	addi %r1, %r1, 240
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
 	blr
c52aaa
-# esac
c52aaa
+# case FFI_V2_TYPE_FLOAT_HOMOG
c52aaa
+	lfs %f1, RETVAL+0(%r1)
c52aaa
+	lfs %f2, RETVAL+4(%r1)
c52aaa
+	lfs %f3, RETVAL+8(%r1)
c52aaa
+	b .Lmorefloat
c52aaa
+# case FFI_V2_TYPE_DOUBLE_HOMOG
c52aaa
+	lfd %f1, RETVAL+0(%r1)
c52aaa
+	lfd %f2, RETVAL+8(%r1)
c52aaa
+	lfd %f3, RETVAL+16(%r1)
c52aaa
+	lfd %f4, RETVAL+24(%r1)
c52aaa
+	mtlr %r0
c52aaa
+	lfd %f5, RETVAL+32(%r1)
c52aaa
+	lfd %f6, RETVAL+40(%r1)
c52aaa
+	lfd %f7, RETVAL+48(%r1)
c52aaa
+	lfd %f8, RETVAL+56(%r1)
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
+	blr
c52aaa
+.Lmorefloat:
c52aaa
+	lfs %f4, RETVAL+12(%r1)
c52aaa
+	mtlr %r0
c52aaa
+	lfs %f5, RETVAL+16(%r1)
c52aaa
+	lfs %f6, RETVAL+20(%r1)
c52aaa
+	lfs %f7, RETVAL+24(%r1)
c52aaa
+	lfs %f8, RETVAL+28(%r1)
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
+	blr
c52aaa
+.Lsmall:
c52aaa
+# ifdef __LITTLE_ENDIAN__
c52aaa
+	ld %r3,RETVAL+0(%r1)
c52aaa
+	mtlr %r0
c52aaa
+	ld %r4,RETVAL+8(%r1)
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
+	blr
c52aaa
+# else
c52aaa
+	# A struct smaller than a dword is returned in the low bits of r3
c52aaa
+	# ie. right justified.  Larger structs are passed left justified
c52aaa
+	# in r3 and r4.  The return value area on the stack will have
c52aaa
+	# the structs as they are usually stored in memory.
c52aaa
+	cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes?
c52aaa
+	neg %r5, %r3
c52aaa
+	ld %r3,RETVAL+0(%r1)
c52aaa
+	blt .Lsmalldown
c52aaa
+	mtlr %r0
c52aaa
+	ld %r4,RETVAL+8(%r1)
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
+	blr
c52aaa
+.Lsmalldown:
c52aaa
+	addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7
c52aaa
+	mtlr %r0
c52aaa
+	sldi %r5, %r5, 3
c52aaa
+	addi %r1, %r1, STACKFRAME
c52aaa
+	srd %r3, %r3, %r5
c52aaa
+	blr
c52aaa
+# endif
c52aaa
+
c52aaa
 .LFE1:
c52aaa
 	.long	0
c52aaa
 	.byte	0,12,0,1,128,0,0,0
c52aaa
-#ifdef _CALL_LINUX
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	.size	ffi_closure_LINUX64,.-ffi_closure_LINUX64
c52aaa
+# else
c52aaa
+#  ifdef _CALL_LINUX
c52aaa
 	.size	ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64
c52aaa
-#else
c52aaa
+#  else
c52aaa
 	.size	.ffi_closure_LINUX64,.-.ffi_closure_LINUX64
c52aaa
-#endif
c52aaa
+#  endif
c52aaa
+# endif
c52aaa
 
c52aaa
 	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
c52aaa
 .Lframe1:
c52aaa
@@ -239,14 +376,14 @@ ffi_closure_LINUX64:
c52aaa
 	.byte	0x2	 # DW_CFA_advance_loc1
c52aaa
 	.byte	.LCFI0-.LFB1
c52aaa
 	.byte	0xe	 # DW_CFA_def_cfa_offset
c52aaa
-	.uleb128 240
c52aaa
+	.uleb128 STACKFRAME
c52aaa
 	.byte	0x11	 # DW_CFA_offset_extended_sf
c52aaa
 	.uleb128 0x41
c52aaa
 	.sleb128 -2
c52aaa
 	.align 3
c52aaa
 .LEFDE1:
c52aaa
-#endif
c52aaa
 
c52aaa
-#if defined __ELF__ && defined __linux__
c52aaa
+# if defined __ELF__ && defined __linux__
c52aaa
 	.section	.note.GNU-stack,"",@progbits
c52aaa
+# endif
c52aaa
 #endif
c52aaa
diff -urp libffi-3.0.13/src/powerpc/linux64.S libffi-current/src/powerpc/linux64.S
c52aaa
--- libffi-3.0.13/src/powerpc/linux64.S	2013-03-16 21:49:39.000000000 +1030
c52aaa
+++ libffi-current/src/powerpc/linux64.S	2013-11-17 09:09:09.742314090 +1030
c52aaa
@@ -32,15 +32,22 @@
c52aaa
 #ifdef __powerpc64__
c52aaa
 	.hidden	ffi_call_LINUX64
c52aaa
 	.globl	ffi_call_LINUX64
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	.text
c52aaa
+ffi_call_LINUX64:
c52aaa
+	addis	%r2, %r12, .TOC.-ffi_call_LINUX64@ha
c52aaa
+	addi	%r2, %r2, .TOC.-ffi_call_LINUX64@l
c52aaa
+	.localentry ffi_call_LINUX64, . - ffi_call_LINUX64
c52aaa
+# else
c52aaa
 	.section	".opd","aw"
c52aaa
 	.align	3
c52aaa
 ffi_call_LINUX64:
c52aaa
-#ifdef _CALL_LINUX
c52aaa
+#  ifdef _CALL_LINUX
c52aaa
 	.quad	.L.ffi_call_LINUX64,.TOC.@tocbase,0
c52aaa
 	.type	ffi_call_LINUX64,@function
c52aaa
 	.text
c52aaa
 .L.ffi_call_LINUX64:
c52aaa
-#else
c52aaa
+#  else
c52aaa
 	.hidden	.ffi_call_LINUX64
c52aaa
 	.globl	.ffi_call_LINUX64
c52aaa
 	.quad	.ffi_call_LINUX64,.TOC.@tocbase,0
c52aaa
@@ -48,7 +55,8 @@ ffi_call_LINUX64:
c52aaa
 	.type	.ffi_call_LINUX64,@function
c52aaa
 	.text
c52aaa
 .ffi_call_LINUX64:
c52aaa
-#endif
c52aaa
+#  endif
c52aaa
+# endif
c52aaa
 .LFB1:
c52aaa
 	mflr	%r0
c52aaa
 	std	%r28, -32(%r1)
c52aaa
@@ -63,26 +71,35 @@ ffi_call_LINUX64:
c52aaa
 	mr	%r31, %r5	/* flags, */
c52aaa
 	mr	%r30, %r6	/* rvalue, */
c52aaa
 	mr	%r29, %r7	/* function address.  */
c52aaa
+/* Save toc pointer, not for the ffi_prep_args64 call, but for the later
c52aaa
+   bctrl function call.  */
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	std	%r2, 24(%r1)
c52aaa
+# else
c52aaa
 	std	%r2, 40(%r1)
c52aaa
+# endif
c52aaa
 
c52aaa
 	/* Call ffi_prep_args64.  */
c52aaa
 	mr	%r4, %r1
c52aaa
-#ifdef _CALL_LINUX
c52aaa
+# if defined _CALL_LINUX || _CALL_ELF == 2
c52aaa
 	bl	ffi_prep_args64
c52aaa
-#else
c52aaa
+# else
c52aaa
 	bl	.ffi_prep_args64
c52aaa
-#endif
c52aaa
+# endif
c52aaa
 
c52aaa
-	ld	%r0, 0(%r29)
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	mr	%r12, %r29
c52aaa
+# else
c52aaa
+	ld	%r12, 0(%r29)
c52aaa
 	ld	%r2, 8(%r29)
c52aaa
 	ld	%r11, 16(%r29)
c52aaa
-
c52aaa
+# endif
c52aaa
 	/* Now do the call.  */
c52aaa
 	/* Set up cr1 with bits 4-7 of the flags.  */
c52aaa
 	mtcrf	0x40, %r31
c52aaa
 
c52aaa
 	/* Get the address to call into CTR.  */
c52aaa
-	mtctr	%r0
c52aaa
+	mtctr	%r12
c52aaa
 	/* Load all those argument registers.  */
c52aaa
 	ld	%r3, -32-(8*8)(%r28)
c52aaa
 	ld	%r4, -32-(7*8)(%r28)
c52aaa
@@ -117,12 +134,17 @@ ffi_call_LINUX64:
c52aaa
 
c52aaa
 	/* This must follow the call immediately, the unwinder
c52aaa
 	   uses this to find out if r2 has been saved or not.  */
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	ld	%r2, 24(%r1)
c52aaa
+# else
c52aaa
 	ld	%r2, 40(%r1)
c52aaa
+# endif
c52aaa
 
c52aaa
 	/* Now, deal with the return value.  */
c52aaa
 	mtcrf	0x01, %r31
c52aaa
-	bt-	30, .Ldone_return_value
c52aaa
-	bt-	29, .Lfp_return_value
c52aaa
+	bt	31, .Lstruct_return_value
c52aaa
+	bt	30, .Ldone_return_value
c52aaa
+	bt	29, .Lfp_return_value
c52aaa
 	std	%r3, 0(%r30)
c52aaa
 	/* Fall through...  */
c52aaa
 
c52aaa
@@ -130,7 +152,7 @@ ffi_call_LINUX64:
c52aaa
 	/* Restore the registers we used and return.  */
c52aaa
 	mr	%r1, %r28
c52aaa
 	ld	%r0, 16(%r28)
c52aaa
-	ld	%r28, -32(%r1)
c52aaa
+	ld	%r28, -32(%r28)
c52aaa
 	mtlr	%r0
c52aaa
 	ld	%r29, -24(%r1)
c52aaa
 	ld	%r30, -16(%r1)
c52aaa
@@ -147,14 +169,48 @@ ffi_call_LINUX64:
c52aaa
 .Lfloat_return_value:
c52aaa
 	stfs	%f1, 0(%r30)
c52aaa
 	b	.Ldone_return_value
c52aaa
+
c52aaa
+.Lstruct_return_value:
c52aaa
+	bf	29, .Lsmall_struct
c52aaa
+	bf	28, .Lfloat_homog_return_value
c52aaa
+	stfd	%f1, 0(%r30)
c52aaa
+	stfd	%f2, 8(%r30)
c52aaa
+	stfd	%f3, 16(%r30)
c52aaa
+	stfd	%f4, 24(%r30)
c52aaa
+	stfd	%f5, 32(%r30)
c52aaa
+	stfd	%f6, 40(%r30)
c52aaa
+	stfd	%f7, 48(%r30)
c52aaa
+	stfd	%f8, 56(%r30)
c52aaa
+	b	.Ldone_return_value
c52aaa
+
c52aaa
+.Lfloat_homog_return_value:
c52aaa
+	stfs	%f1, 0(%r30)
c52aaa
+	stfs	%f2, 4(%r30)
c52aaa
+	stfs	%f3, 8(%r30)
c52aaa
+	stfs	%f4, 12(%r30)
c52aaa
+	stfs	%f5, 16(%r30)
c52aaa
+	stfs	%f6, 20(%r30)
c52aaa
+	stfs	%f7, 24(%r30)
c52aaa
+	stfs	%f8, 28(%r30)
c52aaa
+	b	.Ldone_return_value
c52aaa
+
c52aaa
+.Lsmall_struct:
c52aaa
+	std	%r3, 0(%r30)
c52aaa
+	std	%r4, 8(%r30)
c52aaa
+	b	.Ldone_return_value
c52aaa
+
c52aaa
 .LFE1:
c52aaa
 	.long	0
c52aaa
 	.byte	0,12,0,1,128,4,0,0
c52aaa
-#ifdef _CALL_LINUX
c52aaa
+# if _CALL_ELF == 2
c52aaa
+	.size	ffi_call_LINUX64,.-ffi_call_LINUX64
c52aaa
+# else
c52aaa
+#  ifdef _CALL_LINUX
c52aaa
 	.size	ffi_call_LINUX64,.-.L.ffi_call_LINUX64
c52aaa
-#else
c52aaa
+#  else
c52aaa
 	.size	.ffi_call_LINUX64,.-.ffi_call_LINUX64
c52aaa
-#endif
c52aaa
+#  endif
c52aaa
+# endif
c52aaa
 
c52aaa
 	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
c52aaa
 .Lframe1:
c52aaa
@@ -197,8 +253,8 @@ ffi_call_LINUX64:
c52aaa
 	.uleb128 0x4
c52aaa
 	.align 3
c52aaa
 .LEFDE1:
c52aaa
-#endif
c52aaa
 
c52aaa
-#if defined __ELF__ && defined __linux__
c52aaa
+# if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2
c52aaa
 	.section	.note.GNU-stack,"",@progbits
c52aaa
+# endif
c52aaa
 #endif
c52aaa
diff -urp libffi-3.0.13/src/powerpc/ppc_closure.S libffi-current/src/powerpc/ppc_closure.S
c52aaa
--- libffi-3.0.13/src/powerpc/ppc_closure.S	2013-03-16 21:49:39.000000000 +1030
c52aaa
+++ libffi-current/src/powerpc/ppc_closure.S	2013-11-17 13:06:22.569393369 +1030
c52aaa
@@ -159,25 +159,41 @@ ENTRY(ffi_closure_SYSV)
c52aaa
 #endif
c52aaa
 
c52aaa
 # case FFI_TYPE_UINT8
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	lbz %r3,112+0(%r1)
c52aaa
+#else
c52aaa
 	lbz %r3,112+3(%r1)
c52aaa
+#endif
c52aaa
 	mtlr %r0
c52aaa
 	addi %r1,%r1,144
c52aaa
 	blr
c52aaa
 
c52aaa
 # case FFI_TYPE_SINT8
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	lbz %r3,112+0(%r1)
c52aaa
+#else
c52aaa
 	lbz %r3,112+3(%r1)
c52aaa
+#endif
c52aaa
 	extsb %r3,%r3
c52aaa
 	mtlr %r0
c52aaa
 	b .Lfinish
c52aaa
 
c52aaa
 # case FFI_TYPE_UINT16
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	lhz %r3,112+0(%r1)
c52aaa
+#else
c52aaa
 	lhz %r3,112+2(%r1)
c52aaa
+#endif
c52aaa
 	mtlr %r0
c52aaa
 	addi %r1,%r1,144
c52aaa
 	blr
c52aaa
 
c52aaa
 # case FFI_TYPE_SINT16
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	lha %r3,112+0(%r1)
c52aaa
+#else
c52aaa
 	lha %r3,112+2(%r1)
c52aaa
+#endif
c52aaa
 	mtlr %r0
c52aaa
 	addi %r1,%r1,144
c52aaa
 	blr
c52aaa
@@ -222,7 +238,7 @@ ENTRY(ffi_closure_SYSV)
c52aaa
 	lwz %r3,112+0(%r1)
c52aaa
 	lwz %r4,112+4(%r1)
c52aaa
 	lwz %r5,112+8(%r1)
c52aaa
-	bl .Luint128
c52aaa
+	b .Luint128
c52aaa
 
c52aaa
 # The return types below are only used when the ABI type is FFI_SYSV.
c52aaa
 # case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct.
c52aaa
@@ -239,9 +255,15 @@ ENTRY(ffi_closure_SYSV)
c52aaa
 
c52aaa
 # case FFI_SYSV_TYPE_SMALL_STRUCT + 3. Three byte struct.
c52aaa
 	lwz %r3,112+0(%r1)
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	mtlr %r0
c52aaa
+	addi %r1,%r1,144
c52aaa
+	blr
c52aaa
+#else
c52aaa
 	srwi %r3,%r3,8
c52aaa
 	mtlr %r0
c52aaa
 	b .Lfinish
c52aaa
+#endif
c52aaa
 
c52aaa
 # case FFI_SYSV_TYPE_SMALL_STRUCT + 4. Four byte struct.
c52aaa
 	lwz %r3,112+0(%r1)
c52aaa
@@ -252,20 +274,35 @@ ENTRY(ffi_closure_SYSV)
c52aaa
 # case FFI_SYSV_TYPE_SMALL_STRUCT + 5. Five byte struct.
c52aaa
 	lwz %r3,112+0(%r1)
c52aaa
 	lwz %r4,112+4(%r1)
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	mtlr %r0
c52aaa
+	b .Lfinish
c52aaa
+#else
c52aaa
 	li %r5,24
c52aaa
 	b .Lstruct567
c52aaa
+#endif
c52aaa
 
c52aaa
 # case FFI_SYSV_TYPE_SMALL_STRUCT + 6. Six byte struct.
c52aaa
 	lwz %r3,112+0(%r1)
c52aaa
 	lwz %r4,112+4(%r1)
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	mtlr %r0
c52aaa
+	b .Lfinish
c52aaa
+#else
c52aaa
 	li %r5,16
c52aaa
 	b .Lstruct567
c52aaa
+#endif
c52aaa
 
c52aaa
 # case FFI_SYSV_TYPE_SMALL_STRUCT + 7. Seven byte struct.
c52aaa
 	lwz %r3,112+0(%r1)
c52aaa
 	lwz %r4,112+4(%r1)
c52aaa
+#ifdef __LITTLE_ENDIAN__
c52aaa
+	mtlr %r0
c52aaa
+	b .Lfinish
c52aaa
+#else
c52aaa
 	li %r5,8
c52aaa
 	b .Lstruct567
c52aaa
+#endif
c52aaa
 
c52aaa
 # case FFI_SYSV_TYPE_SMALL_STRUCT + 8. Eight byte struct.
c52aaa
 	lwz %r3,112+0(%r1)
c52aaa
@@ -273,6 +310,7 @@ ENTRY(ffi_closure_SYSV)
c52aaa
 	mtlr %r0
c52aaa
 	b .Lfinish
c52aaa
 
c52aaa
+#ifndef __LITTLE_ENDIAN__
c52aaa
 .Lstruct567:
c52aaa
 	subfic %r6,%r5,32
c52aaa
 	srw %r4,%r4,%r5
c52aaa
@@ -282,6 +320,7 @@ ENTRY(ffi_closure_SYSV)
c52aaa
 	mtlr %r0
c52aaa
 	addi %r1,%r1,144
c52aaa
 	blr
c52aaa
+#endif
c52aaa
 
c52aaa
 .Luint128:
c52aaa
 	lwz %r6,112+12(%r1)
c52aaa
diff -urp libffi-3.0.13/src/powerpc/sysv.S libffi-current/src/powerpc/sysv.S
c52aaa
--- libffi-3.0.13/src/powerpc/sysv.S	2013-03-16 21:49:39.000000000 +1030
c52aaa
+++ libffi-current/src/powerpc/sysv.S	2013-11-13 22:36:35.222994628 +1030
c52aaa
@@ -142,19 +142,14 @@ L(float_return_value):
c52aaa
 #endif
c52aaa
 
c52aaa
 L(small_struct_return_value):
c52aaa
-	extrwi	%r6,%r31,2,19         /* number of bytes padding = shift/8 */
c52aaa
-	mtcrf	0x02,%r31	      /* copy flags to cr[24:27] (cr6) */
c52aaa
-	extrwi	%r5,%r31,5,19         /* r5 <- number of bits of padding */
c52aaa
-	subfic  %r6,%r6,4             /* r6 <- number of useful bytes in r3 */
c52aaa
-	bf-	25,L(done_return_value) /* struct in r3 ? if not, done. */
c52aaa
-/* smst_one_register: */
c52aaa
-	slw	%r3,%r3,%r5           /* Left-justify value in r3 */
c52aaa
-	mtxer	%r6                   /* move byte count to XER ... */
c52aaa
-	stswx	%r3,0,%r30            /* ... and store that many bytes */
c52aaa
-	bf+	26,L(done_return_value)  /* struct in r3:r4 ? */
c52aaa
-	add	%r6,%r6,%r30          /* adjust pointer */
c52aaa
-	stswi	%r4,%r6,4             /* store last four bytes */
c52aaa
-	b	L(done_return_value)
c52aaa
+	/*
c52aaa
+	 * The C code always allocates a properly-aligned 8-byte bounce
c52aaa
+	 * buffer to make this assembly code very simple.  Just write out
c52aaa
+	 * r3 and r4 to the buffer to allow the C code to handle the rest.
c52aaa
+	 */
c52aaa
+	stw %r3, 0(%r30)
c52aaa
+	stw %r4, 4(%r30)
c52aaa
+	b L(done_return_value)
c52aaa
 
c52aaa
 .LFE1:
c52aaa
 END(ffi_call_SYSV)
c52aaa
diff -urp libffi-3.0.13/testsuite/libffi.call/cls_double_va.c libffi-current/testsuite/libffi.call/cls_double_va.c
c52aaa
--- libffi-3.0.13/testsuite/libffi.call/cls_double_va.c	2013-03-16 21:49:39.000000000 +1030
c52aaa
+++ libffi-current/testsuite/libffi.call/cls_double_va.c	2013-11-13 22:37:13.437459229 +1030
c52aaa
@@ -38,7 +38,7 @@ int main (void)
c52aaa
 
c52aaa
 	/* This printf call is variadic */
c52aaa
 	CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint,
c52aaa
-		arg_types) == FFI_OK);
c52aaa
+			       arg_types) == FFI_OK);
c52aaa
 
c52aaa
 	args[0] = &format;
c52aaa
 	args[1] = &doubleArg;
c52aaa
@@ -49,12 +49,10 @@ int main (void)
c52aaa
 	printf("res: %d\n", (int) res);
c52aaa
 	/* { dg-output "\nres: 4" } */
c52aaa
 
c52aaa
-	/* The call to cls_double_va_fn is static, so have to use a normal prep_cif */
c52aaa
-	CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint, arg_types) == FFI_OK);
c52aaa
+	CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL,
c52aaa
+				   code) == FFI_OK);
c52aaa
 
c52aaa
-	CHECK(ffi_prep_closure_loc(pcl, &cif, cls_double_va_fn, NULL, code) == FFI_OK);
c52aaa
-
c52aaa
-	res	= ((int(*)(char*, double))(code))(format, doubleArg);
c52aaa
+	res = ((int(*)(char*, ...))(code))(format, doubleArg);
c52aaa
 	/* { dg-output "\n7.0" } */
c52aaa
 	printf("res: %d\n", (int) res);
c52aaa
 	/* { dg-output "\nres: 4" } */
c52aaa
diff -urp libffi-3.0.13/testsuite/libffi.call/cls_longdouble_va.c libffi-current/testsuite/libffi.call/cls_longdouble_va.c
c52aaa
--- libffi-3.0.13/testsuite/libffi.call/cls_longdouble_va.c	2013-03-16 21:49:39.000000000 +1030
c52aaa
+++ libffi-current/testsuite/libffi.call/cls_longdouble_va.c	2013-11-13 22:37:13.437459229 +1030
c52aaa
@@ -38,7 +38,7 @@ int main (void)
c52aaa
 
c52aaa
 	/* This printf call is variadic */
c52aaa
 	CHECK(ffi_prep_cif_var(&cif, FFI_DEFAULT_ABI, 1, 2, &ffi_type_sint,
c52aaa
-		arg_types) == FFI_OK);
c52aaa
+			       arg_types) == FFI_OK);
c52aaa
 
c52aaa
 	args[0] = &format;
c52aaa
 	args[1] = &ldArg;
c52aaa
@@ -49,13 +49,10 @@ int main (void)
c52aaa
 	printf("res: %d\n", (int) res);
c52aaa
 	/* { dg-output "\nres: 4" } */
c52aaa
 
c52aaa
-	/* The call to cls_longdouble_va_fn is static, so have to use a normal prep_cif */
c52aaa
-	CHECK(ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 2, &ffi_type_sint,
c52aaa
-		arg_types) == FFI_OK);
c52aaa
+	CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL,
c52aaa
+				   code) == FFI_OK);
c52aaa
 
c52aaa
-	CHECK(ffi_prep_closure_loc(pcl, &cif, cls_longdouble_va_fn, NULL, code) == FFI_OK);
c52aaa
-
c52aaa
-	res	= ((int(*)(char*, long double))(code))(format, ldArg);
c52aaa
+	res = ((int(*)(char*, ...))(code))(format, ldArg);
c52aaa
 	/* { dg-output "\n7.0" } */
c52aaa
 	printf("res: %d\n", (int) res);
c52aaa
 	/* { dg-output "\nres: 4" } */
c52aaa
diff -urp libffi-3.0.13/doc/libffi.texi libffi-current/doc/libffi.texi
c52aaa
--- libffi-3.0.13/doc/libffi.texi	2013-03-16 22:41:19.000000000 +1030
c52aaa
+++ libffi-current/doc/libffi.texi	2013-11-17 09:06:03.209763612 +1030
c52aaa
@@ -184,11 +184,11 @@ This calls the function @var{fn} accordi
c52aaa
 
c52aaa
 @var{rvalue} is a pointer to a chunk of memory that will hold the
c52aaa
 result of the function call.  This must be large enough to hold the
c52aaa
-result and must be suitably aligned; it is the caller's responsibility
c52aaa
+result, no smaller than the system register size (generally 32 or 64
c52aaa
+bits), and must be suitably aligned; it is the caller's responsibility
c52aaa
 to ensure this.  If @var{cif} declares that the function returns
c52aaa
 @code{void} (using @code{ffi_type_void}), then @var{rvalue} is
c52aaa
-ignored.  If @var{rvalue} is @samp{NULL}, then the return value is
c52aaa
-discarded.
c52aaa
+ignored.
c52aaa
 
c52aaa
 @var{avalues} is a vector of @code{void *} pointers that point to the
c52aaa
 memory locations holding the argument values for a call.  If @var{cif}
c52aaa
@@ -214,7 +214,7 @@ int main()
c52aaa
   ffi_type *args[1];
c52aaa
   void *values[1];
c52aaa
   char *s;
c52aaa
-  int rc;
c52aaa
+  ffi_arg rc;
c52aaa
   
c52aaa
   /* Initialize the argument info vectors */    
c52aaa
   args[0] = &ffi_type_pointer;
c52aaa
@@ -222,7 +222,7 @@ int main()
c52aaa
   
c52aaa
   /* Initialize the cif */
c52aaa
   if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1, 
c52aaa
-		       &ffi_type_uint, args) == FFI_OK)
c52aaa
+		       &ffi_type_sint, args) == FFI_OK)
c52aaa
     @{
c52aaa
       s = "Hello World!";
c52aaa
       ffi_call(&cif, puts, &rc, values);
c52aaa
@@ -414,6 +414,7 @@ Here is the corresponding code to descri
c52aaa
       int i;
c52aaa
 
c52aaa
       tm_type.size = tm_type.alignment = 0;
c52aaa
+      tm_type.type = FFI_TYPE_STRUCT;
c52aaa
       tm_type.elements = &tm_type_elements;
c52aaa
     
c52aaa
       for (i = 0; i < 9; i++)
c52aaa
@@ -540,21 +541,23 @@ A trivial example that creates a new @co
c52aaa
 #include <ffi.h>
c52aaa
 
c52aaa
 /* Acts like puts with the file given at time of enclosure. */
c52aaa
-void puts_binding(ffi_cif *cif, unsigned int *ret, void* args[], 
c52aaa
-                  FILE *stream)
c52aaa
+void puts_binding(ffi_cif *cif, void *ret, void* args[],
c52aaa
+                  void *stream)
c52aaa
 @{
c52aaa
-  *ret = fputs(*(char **)args[0], stream);
c52aaa
+  *(ffi_arg *)ret = fputs(*(char **)args[0], (FILE *)stream);
c52aaa
 @}
c52aaa
 
c52aaa
+typedef int (*puts_t)(char *);
c52aaa
+
c52aaa
 int main()
c52aaa
 @{
c52aaa
   ffi_cif cif;
c52aaa
   ffi_type *args[1];
c52aaa
   ffi_closure *closure;
c52aaa
 
c52aaa
-  int (*bound_puts)(char *);
c52aaa
+  void *bound_puts;
c52aaa
   int rc;
c52aaa
-  
c52aaa
+
c52aaa
   /* Allocate closure and bound_puts */
c52aaa
   closure = ffi_closure_alloc(sizeof(ffi_closure), &bound_puts);
c52aaa
 
c52aaa
@@ -565,13 +568,13 @@ int main()
c52aaa
 
c52aaa
       /* Initialize the cif */
c52aaa
       if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, 1,
c52aaa
-                       &ffi_type_uint, args) == FFI_OK)
c52aaa
+                       &ffi_type_sint, args) == FFI_OK)
c52aaa
         @{
c52aaa
           /* Initialize the closure, setting stream to stdout */
c52aaa
-          if (ffi_prep_closure_loc(closure, &cif, puts_binding, 
c52aaa
+          if (ffi_prep_closure_loc(closure, &cif, puts_binding,
c52aaa
                                    stdout, bound_puts) == FFI_OK)
c52aaa
             @{
c52aaa
-              rc = bound_puts("Hello World!");
c52aaa
+              rc = ((puts_t)bound_puts)("Hello World!");
c52aaa
               /* rc now holds the result of the call to fputs */
c52aaa
             @}
c52aaa
         @}