24d066
Correct vec_perm() application on little-endian 64-bit PowerPC
24d066
24d066
The LE transformation for vec_perm has an implicit assumption that the
24d066
permutation is being used to reorder vector elements (in this case 4-byte
24d066
integer word elements), not to reorder bytes within those elements.  Although
24d066
this is legal behavior, it is not anticipated by the transformation performed
24d066
by the compilers.
24d066
24d066
This causes pygame-1.9.1 test failure on PPC64LE because blitted pixmaps are
24d066
corrupted there due to how SDL uses vec_perm().
24d066
24d066
<https://bugzilla.redhat.com/show_bug.cgi?id=1392465>
24d066
24d066
--- SDL-1.2.15/src/video/SDL_blit_N.c.ori	2017-09-04 05:56:17.759347525 -0400
24d066
+++ SDL-1.2.15/src/video/SDL_blit_N.c	2017-09-06 05:36:20.570789610 -0400
24d066
@@ -146,6 +146,32 @@ static vector unsigned char calc_swizzle
24d066
     return(vswiz);
24d066
 }
24d066
 
24d066
+/* reorder bytes for PowerPC little endian */
24d066
+static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
24d066
+{
24d066
+    /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
24d066
+       The LE transformation for vec_perm has an implicit assumption
24d066
+       that the permutation is being used to reorder vector elements,
24d066
+       not to reorder bytes within those elements.  
24d066
+       Unfortunatly the result order is not the expected one for powerpc
24d066
+       little endian when the two first vector parameters of vec_perm are
24d066
+       not of type 'vector char'. This is because the numbering from the
24d066
+       left for BE, and numbering from the right for LE, produces a
24d066
+       different interpretation of what the odd and even lanes are.
24d066
+       Refer to fedora bug 1392465
24d066
+     */
24d066
+
24d066
+    const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
24d066
+                                      0x01, 0x00, 0x03, 0x02,
24d066
+                                      0x05, 0x04, 0x07, 0x06,
24d066
+                                      0x09, 0x08, 0x0B, 0x0A,
24d066
+                                      0x0D, 0x0C, 0x0F, 0x0E );
24d066
+
24d066
+    vector unsigned char vswiz_ppc64le;
24d066
+    vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
24d066
+    return(vswiz_ppc64le);
24d066
+}
24d066
+
24d066
 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
24d066
 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
24d066
     int height = info->d_height;
24d066
@@ -631,6 +657,12 @@ static void Blit32to32KeyAltivec(SDL_Bli
24d066
                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
24d066
                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
24d066
                 /* permute the src vec to the dest format */
24d066
+
24d066
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
24d066
+            /* reorder bytes for PowerPC little endian */
24d066
+            vpermute = reorder_ppc64le_vec(vpermute);
24d066
+#endif
24d066
+
24d066
                 vs = vec_perm(vs, valpha, vpermute);
24d066
                 /* load the destination vec */
24d066
                 vd = vec_ld(0, dstp);
24d066
@@ -704,6 +736,12 @@ static void ConvertAltivec32to32_noprefe
24d066
             src += 4;
24d066
             width -= 4;
24d066
             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
24d066
+
24d066
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
24d066
+            /* reorder bytes for PowerPC little endian */
24d066
+            vpermute = reorder_ppc64le_vec(vpermute);
24d066
+#endif
24d066
+
24d066
             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
24d066
             vec_st(vbits, 0, dst);  /* store it back out. */
24d066
             dst += 4;
24d066
@@ -786,6 +824,12 @@ static void ConvertAltivec32to32_prefetc
24d066
             src += 4;
24d066
             width -= 4;
24d066
             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
24d066
+
24d066
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN) 
24d066
+            /* reorder bytes for PowerPC little endian */
24d066
+            vpermute = reorder_ppc64le_vec(vpermute);
24d066
+#endif
24d066
+
24d066
             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
24d066
             vec_st(vbits, 0, dst);  /* store it back out. */
24d066
             dst += 4;