e2eb78
Correct vec_perm() application on little-endian 64-bit PowerPC
e2eb78
e2eb78
The LE transformation for vec_perm has an implicit assumption that the
e2eb78
permutation is being used to reorder vector elements (in this case 4-byte
e2eb78
integer word elements), not to reorder bytes within those elements.  Although
e2eb78
this is legal behavior, it is not anticipated by the transformation performed
e2eb78
by the compilers.
e2eb78
e2eb78
This causes pygame-1.9.1 test failure on PPC64LE because blitted pixmaps are
e2eb78
corrupted there due to how SDL uses vec_perm().
e2eb78
e2eb78
<https://bugzilla.redhat.com/show_bug.cgi?id=1392465>
e2eb78
e2eb78
--- SDL-1.2.15/src/video/SDL_blit_N.c.ori	2017-09-04 05:56:17.759347525 -0400
e2eb78
+++ SDL-1.2.15/src/video/SDL_blit_N.c	2017-09-06 05:36:20.570789610 -0400
e2eb78
@@ -146,6 +146,32 @@ static vector unsigned char calc_swizzle
e2eb78
     return(vswiz);
e2eb78
 }
e2eb78
 
e2eb78
+/* reorder bytes for PowerPC little endian */
e2eb78
+static vector unsigned char reorder_ppc64le_vec(vector unsigned char vpermute)
e2eb78
+{
e2eb78
+    /* The result vector of calc_swizzle32 reorder bytes using vec_perm.
e2eb78
+       The LE transformation for vec_perm has an implicit assumption
e2eb78
+       that the permutation is being used to reorder vector elements,
e2eb78
+       not to reorder bytes within those elements.  
e2eb78
+       Unfortunatly the result order is not the expected one for powerpc
e2eb78
+       little endian when the two first vector parameters of vec_perm are
e2eb78
+       not of type 'vector char'. This is because the numbering from the
e2eb78
+       left for BE, and numbering from the right for LE, produces a
e2eb78
+       different interpretation of what the odd and even lanes are.
e2eb78
+       Refer to fedora bug 1392465
e2eb78
+     */
e2eb78
+
e2eb78
+    const vector unsigned char ppc64le_reorder = VECUINT8_LITERAL(
e2eb78
+                                      0x01, 0x00, 0x03, 0x02,
e2eb78
+                                      0x05, 0x04, 0x07, 0x06,
e2eb78
+                                      0x09, 0x08, 0x0B, 0x0A,
e2eb78
+                                      0x0D, 0x0C, 0x0F, 0x0E );
e2eb78
+
e2eb78
+    vector unsigned char vswiz_ppc64le;
e2eb78
+    vswiz_ppc64le = vec_perm(vpermute, vpermute, ppc64le_reorder);
e2eb78
+    return(vswiz_ppc64le);
e2eb78
+}
e2eb78
+
e2eb78
 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
e2eb78
 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
e2eb78
     int height = info->d_height;
e2eb78
@@ -631,6 +657,12 @@ static void Blit32to32KeyAltivec(SDL_Bli
e2eb78
                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
e2eb78
                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
e2eb78
                 /* permute the src vec to the dest format */
e2eb78
+
e2eb78
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
e2eb78
+            /* reorder bytes for PowerPC little endian */
e2eb78
+            vpermute = reorder_ppc64le_vec(vpermute);
e2eb78
+#endif
e2eb78
+
e2eb78
                 vs = vec_perm(vs, valpha, vpermute);
e2eb78
                 /* load the destination vec */
e2eb78
                 vd = vec_ld(0, dstp);
e2eb78
@@ -704,6 +736,12 @@ static void ConvertAltivec32to32_noprefe
e2eb78
             src += 4;
e2eb78
             width -= 4;
e2eb78
             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
e2eb78
+
e2eb78
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
e2eb78
+            /* reorder bytes for PowerPC little endian */
e2eb78
+            vpermute = reorder_ppc64le_vec(vpermute);
e2eb78
+#endif
e2eb78
+
e2eb78
             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
e2eb78
             vec_st(vbits, 0, dst);  /* store it back out. */
e2eb78
             dst += 4;
e2eb78
@@ -786,6 +824,12 @@ static void ConvertAltivec32to32_prefetc
e2eb78
             src += 4;
e2eb78
             width -= 4;
e2eb78
             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
e2eb78
+
e2eb78
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN) 
e2eb78
+            /* reorder bytes for PowerPC little endian */
e2eb78
+            vpermute = reorder_ppc64le_vec(vpermute);
e2eb78
+#endif
e2eb78
+
e2eb78
             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
e2eb78
             vec_st(vbits, 0, dst);  /* store it back out. */
e2eb78
             dst += 4;