f76d5d
Patch downloaded from
f76d5d
http://bugs.gentoo.org/show_bug.cgi?id=121871
f76d5d
http://bugs.gentoo.org/attachment.cgi?id=98094
f76d5d
f76d5d
--- libdv-0.104-old/libdv/asm_common.S
f76d5d
+++ libdv-0.104/libdv/asm_common.S
f76d5d
@@ -0,0 +1,29 @@
f76d5d
+/* public domain, do what you want */
f76d5d
+
f76d5d
+#ifdef __PIC__
f76d5d
+# define MUNG(sym)                 sym##@GOTOFF(%ebp)
f76d5d
+# define MUNG_ARR(sym, args...)    sym##@GOTOFF(%ebp,##args)
f76d5d
+#else
f76d5d
+# define MUNG(sym)                 sym
f76d5d
+# define MUNG_ARR(sym, args...)    sym(,##args)
f76d5d
+#endif
f76d5d
+
f76d5d
+#ifdef __PIC__
f76d5d
+# undef __i686 /* gcc define gets in our way */
f76d5d
+# define LOAD_PIC_REG(reg) \
f76d5d
+	.ifndef  __i686.get_pc_thunk.reg; \
f76d5d
+	.section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \
f76d5d
+	.global  __i686.get_pc_thunk.reg; \
f76d5d
+	.hidden  __i686.get_pc_thunk.reg; \
f76d5d
+	.type    __i686.get_pc_thunk.reg,@function; \
f76d5d
+	__i686.get_pc_thunk.reg: \
f76d5d
+	movl (%esp), %e##reg; \
f76d5d
+	ret; \
f76d5d
+	.size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \
f76d5d
+	.previous; \
f76d5d
+	.endif; \
f76d5d
+	call __i686.get_pc_thunk.reg; \
f76d5d
+	addl $_GLOBAL_OFFSET_TABLE_, %e##reg
f76d5d
+#else
f76d5d
+# define LOAD_PIC_REG(reg)
f76d5d
+#endif
f76d5d
--- libdv-0.104-old/libdv/dct_block_mmx.S
f76d5d
+++ libdv-0.104/libdv/dct_block_mmx.S
f76d5d
@@ -55,19 +55,22 @@ scratch2:       .quad 0
f76d5d
 
f76d5d
 .section .note.GNU-stack, "", @progbits
f76d5d
 
f76d5d
+#include "asm_common.S"
f76d5d
+
f76d5d
 .text
f76d5d
 
f76d5d
 .align 8	
f76d5d
 .global _dv_dct_88_block_mmx
f76d5d
 .hidden _dv_dct_88_block_mmx
f76d5d
 .type   _dv_dct_88_block_mmx,@function
f76d5d
 _dv_dct_88_block_mmx:
f76d5d
 
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 
f76d5d
-	movl    8(%ebp), %esi          # source
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    12(%esp), %esi          # source
f76d5d
 
f76d5d
 # column 0
f76d5d
 	movq 16*0(%esi), %mm0          # v0
f76d5d
@@ -88,22 +93,22 @@ _dv_dct_88_block_mmx:
f76d5d
 
f76d5d
 	movq 16*3(%esi), %mm5          # v3
f76d5d
 	movq 16*4(%esi), %mm7          # v4
f76d5d
-	movq  %mm7, scratch1           # scratch1: v4   ; 
f76d5d
+	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
f76d5d
 	movq  %mm5, %mm7               # duplicate v3 
f76d5d
-	paddw scratch1, %mm5           # v03: v3+v4  
f76d5d
-	psubw scratch1, %mm7           # v04: v3-v4  
f76d5d
-	movq  %mm5, scratch2           # scratch2: v03
f76d5d
+	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
f76d5d
+	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
f76d5d
+	movq  %mm5, MUNG(scratch2)     # scratch2: v03
f76d5d
 	movq  %mm0, %mm5               # mm5: v00
f76d5d
 
f76d5d
-	paddw scratch2, %mm0           # v10: v00+v03   
f76d5d
-	psubw scratch2, %mm5           # v13: v00-v03   
f76d5d
-	movq  %mm3, scratch3           # scratch3: v02
f76d5d
+	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
f76d5d
+	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
f76d5d
+	movq  %mm3, MUNG(scratch3)     # scratch3: v02
f76d5d
 	movq  %mm1, %mm3               # duplicate v01
f76d5d
 
f76d5d
-	paddw scratch3, %mm1          # v11: v01+v02
f76d5d
-	psubw scratch3, %mm3          # v12: v01-v02
f76d5d
+	paddw MUNG(scratch3), %mm1    # v11: v01+v02
f76d5d
+	psubw MUNG(scratch3), %mm3    # v12: v01-v02
f76d5d
 
f76d5d
-	movq  %mm6, scratch4           # scratch4: v05
f76d5d
+	movq  %mm6, MUNG(scratch4)     # scratch4: v05
f76d5d
 	movq  %mm0, %mm6               # duplicate v10
f76d5d
 
f76d5d
 	paddw %mm1, %mm0              # v10+v11
f76d5d
@@ -113,10 +118,10 @@ _dv_dct_88_block_mmx:
f76d5d
 	movq  %mm6, 16*4(%esi)         # out4: v10-v11 
f76d5d
 
f76d5d
 	movq  %mm4, %mm0               # mm0: v06
f76d5d
-	paddw scratch4, %mm4          # v15: v05+v06 
f76d5d
+	paddw MUNG(scratch4), %mm4    # v15: v05+v06 
f76d5d
 	paddw  %mm2, %mm0             # v16: v07+v06
f76d5d
 
f76d5d
-	pmulhw WA3, %mm4               # v35~: WA3*v15
f76d5d
+	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
f76d5d
 	psllw  $1, %mm4                # v35: compensate the coeefient scale
f76d5d
 
f76d5d
 	movq   %mm4, %mm6              # duplicate v35
f76d5d
@@ -125,7 +130,7 @@ _dv_dct_88_block_mmx:
f76d5d
 
f76d5d
 	paddw  %mm5, %mm3             # v22: v12+v13
f76d5d
 
f76d5d
-	pmulhw WA1, %mm3               # v32~: WA1*v22
f76d5d
+	pmulhw MUNG(WA1), %mm3         # v32~: WA1*v22
f76d5d
 	psllw  $16-NSHIFT, %mm3        # v32: compensate the coeefient scale
f76d5d
 	movq   %mm5, %mm6              # duplicate v13
f76d5d
 
f76d5d
@@ -136,13 +141,13 @@ _dv_dct_88_block_mmx:
f76d5d
 	movq  %mm6, 16*6(%esi)         # out6: v13-v32 
f76d5d
 
f76d5d
 
f76d5d
-	paddw  scratch4, %mm7         # v14n: v04+v05
f76d5d
+	paddw  MUNG(scratch4), %mm7   # v14n: v04+v05
f76d5d
 	movq   %mm0, %mm5              # duplicate v16
f76d5d
 
f76d5d
 	psubw  %mm7, %mm0             # va1: v16-v14n
f76d5d
-	pmulhw WA5, %mm0               # va0~:  va1*WA5
f76d5d
-	pmulhw WA4, %mm5               # v36~~: v16*WA4
f76d5d
-	pmulhw WA2, %mm7               # v34~~: v14n*WA2
f76d5d
+	pmulhw MUNG(WA5), %mm0         # va0~:  va1*WA5
f76d5d
+	pmulhw MUNG(WA4), %mm5         # v36~~: v16*WA4
f76d5d
+	pmulhw MUNG(WA2), %mm7         # v34~~: v14n*WA2
f76d5d
 	psllw  $16-WA4_SHIFT, %mm5     # v36: compensate the coeefient scale 
f76d5d
 	psllw  $16-NSHIFT, %mm7        # v34: compensate the coeefient scale
f76d5d
 
f76d5d
@@ -190,22 +195,22 @@ _dv_dct_88_block_mmx:
f76d5d
 
f76d5d
 	movq 16*3(%esi), %mm5              # v3
f76d5d
 	movq 16*4(%esi), %mm7              # v4
f76d5d
-	movq  %mm7, scratch1                    # scratch1: v4   ; 
f76d5d
+	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
f76d5d
 	movq  %mm5, %mm7               # duplicate v3 
f76d5d
-	paddw scratch1, %mm5           # v03: v3+v4  
f76d5d
-	psubw scratch1, %mm7           # v04: v3-v4  
f76d5d
-	movq  %mm5, scratch2        # scratch2: v03
f76d5d
+	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
f76d5d
+	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
f76d5d
+	movq  %mm5, MUNG(scratch2)     # scratch2: v03
f76d5d
 	movq  %mm0, %mm5               # mm5: v00
f76d5d
 
f76d5d
-	paddw scratch2, %mm0           # v10: v00+v03   
f76d5d
-	psubw scratch2, %mm5           # v13: v00-v03   
f76d5d
-	movq  %mm3, scratch3         # scratc3: v02
f76d5d
+	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
f76d5d
+	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
f76d5d
+	movq  %mm3, MUNG(scratch3)     # scratc3: v02
f76d5d
 	movq  %mm1, %mm3               # duplicate v01
f76d5d
 
f76d5d
-	paddw scratch3, %mm1           # v11: v01+v02
f76d5d
-	psubw scratch3, %mm3           # v12: v01-v02
f76d5d
+	paddw MUNG(scratch3), %mm1     # v11: v01+v02
f76d5d
+	psubw MUNG(scratch3), %mm3     # v12: v01-v02
f76d5d
 
f76d5d
-	movq  %mm6, scratch4         # scratc4: v05
f76d5d
+	movq  %mm6, MUNG(scratch4)     # scratc4: v05
f76d5d
 	movq  %mm0, %mm6               # duplicate v10
f76d5d
 
f76d5d
 	paddw %mm1, %mm0                            # v10+v11
f76d5d
@@ -215,10 +220,10 @@ _dv_dct_88_block_mmx:
f76d5d
 	movq  %mm6, 16*4(%esi)          # out4: v10-v11 
f76d5d
 
f76d5d
 	movq  %mm4, %mm0             # mm0: v06
f76d5d
-	paddw scratch4, %mm4         # v15: v05+v06 
f76d5d
+	paddw MUNG(scratch4), %mm4     # v15: v05+v06 
f76d5d
 	paddw  %mm2, %mm0                       # v16: v07+v06
f76d5d
 
f76d5d
-	pmulhw WA3, %mm4           # v35~: WA3*v15
f76d5d
+	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
f76d5d
 	psllw  $16-NSHIFT, %mm4       # v35: compensate the coeefient scale
f76d5d
 
f76d5d
 	movq   %mm4, %mm6            # duplicate v35
f76d5d
@@ -227,7 +232,7 @@ _dv_dct_88_block_mmx:
f76d5d
 
f76d5d
 	paddw  %mm5, %mm3            # v22: v12+v13
f76d5d
 
f76d5d
-	pmulhw WA1, %mm3           # v32~: WA3*v15
f76d5d
+	pmulhw MUNG(WA1), %mm3         # v32~: WA3*v15
f76d5d
 	psllw  $16-NSHIFT, %mm3       # v32: compensate the coeefient scale
f76d5d
 	movq   %mm5, %mm6            # duplicate v13
f76d5d
 
f76d5d
@@ -237,13 +242,13 @@ _dv_dct_88_block_mmx:
f76d5d
 	movq  %mm5, 16*2(%esi)          # out2: v13+v32 
f76d5d
 	movq  %mm6, 16*6(%esi)          # out6: v13-v32 
f76d5d
 
f76d5d
-	paddw  scratch4, %mm7                           # v14n: v04+v05
f76d5d
+	paddw  MUNG(scratch4), %mm7     # v14n: v04+v05
f76d5d
 	movq   %mm0, %mm5                               # duplicate v16
f76d5d
 
f76d5d
 	psubw  %mm7, %mm0                               # va1: v16-v14n
f76d5d
-	pmulhw WA2, %mm7                # v34~~: v14n*WA2
f76d5d
-	pmulhw WA5, %mm0                # va0~:  va1*WA5
f76d5d
-	pmulhw WA4, %mm5                        # v36~~: v16*WA4
f76d5d
+	pmulhw MUNG(WA2), %mm7          # v34~~: v14n*WA2
f76d5d
+	pmulhw MUNG(WA5), %mm0          # va0~:  va1*WA5
f76d5d
+	pmulhw MUNG(WA4), %mm5          # v36~~: v16*WA4
f76d5d
 	psllw  $16-NSHIFT, %mm7
f76d5d
 	psllw  $16-WA4_SHIFT, %mm5      # v36: compensate the coeffient 
f76d5d
 		# scale note that WA4 is shifted 1 bit less than the others
f76d5d
@@ -755,11 +762,12 @@ _dv_dct_block_mmx_postscale_88:
f76d5d
 _dv_dct_248_block_mmx:
f76d5d
 
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl   %edi
f76d5d
 
f76d5d
-	movl    8(%ebp), %esi          # source
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    16(%esp), %esi          # source
f76d5d
 
f76d5d
 # column 0
f76d5d
 
f76d5d
@@ -781,7 +791,7 @@ _dv_dct_248_block_mmx:
f76d5d
 	paddw %mm1, %mm0	       # v20: v10+v11
f76d5d
 	psubw %mm1, %mm3	       # v21: v10-v11
f76d5d
 
f76d5d
-	pmulhw WA1, %mm5               # v32~: WA1*v22
f76d5d
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
f76d5d
 	movq  %mm4, %mm2	
f76d5d
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
f76d5d
 
f76d5d
@@ -820,7 +830,7 @@ _dv_dct_248_block_mmx:
f76d5d
 	paddw %mm1, %mm0	       # v20: v10+v11
f76d5d
 	psubw %mm1, %mm3	       # v21: v10-v11
f76d5d
 
f76d5d
-	pmulhw WA1, %mm5               # v32~: WA1*v22
f76d5d
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
f76d5d
 	movq  %mm4, %mm2	
f76d5d
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
f76d5d
 
f76d5d
@@ -857,7 +867,7 @@ _dv_dct_248_block_mmx:
f76d5d
 	paddw %mm1, %mm0	       # v20: v10+v11
f76d5d
 	psubw %mm1, %mm3	       # v21: v10-v11
f76d5d
 
f76d5d
-	pmulhw WA1, %mm5               # v32~: WA1*v22
f76d5d
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
f76d5d
 	movq  %mm4, %mm2	
f76d5d
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
f76d5d
 
f76d5d
@@ -894,7 +904,7 @@ _dv_dct_248_block_mmx:
f76d5d
 	paddw %mm1, %mm0	       # v20: v10+v11
f76d5d
 	psubw %mm1, %mm3	       # v21: v10-v11
f76d5d
 
f76d5d
-	pmulhw WA1, %mm5               # v32~: WA1*v22
f76d5d
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
f76d5d
 	movq  %mm4, %mm2	
f76d5d
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
f76d5d
 
f76d5d
--- libdv-0.104-old/libdv/dv.c
f76d5d
+++ libdv-0.104/libdv/dv.c
f76d5d
@@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
f76d5d
 } /* dv_reconfigure */
f76d5d
 
f76d5d
 
f76d5d
+extern uint8_t dv_quant_offset[4];
f76d5d
+extern uint8_t dv_quant_shifts[22][4];
f76d5d
+
f76d5d
 static inline void 
f76d5d
 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
f76d5d
   int i;
f76d5d
@@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
f76d5d
       dv_idct_248 (co248, mb->b[i].coeffs);
f76d5d
     } else {
f76d5d
 #if ARCH_X86
f76d5d
-      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
f76d5d
+      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
f76d5d
       _dv_idct_88(mb->b[i].coeffs);
f76d5d
 #elif ARCH_X86_64
f76d5d
       _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
f76d5d
@@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
f76d5d
 	dv_idct_248 (co248, mb->b[b].coeffs);
f76d5d
       } else {
f76d5d
 #if ARCH_X86
f76d5d
-	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
f76d5d
+	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
f76d5d
 	_dv_weight_88_inverse(bl->coeffs);
f76d5d
 	_dv_idct_88(bl->coeffs);
f76d5d
 #elif ARCH_X86_64
f76d5d
--- libdv-0.104-old/libdv/encode.c
f76d5d
+++ libdv-0.104/libdv/encode.c
f76d5d
@@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
f76d5d
 }
f76d5d
 
f76d5d
 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
f76d5d
-					  dv_vlc_entry_t ** out);
f76d5d
+					  dv_vlc_entry_t ** out,
f76d5d
+					  dv_vlc_entry_t * lookup);
f76d5d
 
f76d5d
 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
f76d5d
 					  dv_vlc_entry_t ** out);
f76d5d
@@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
f76d5d
 #elif ARCH_X86
f76d5d
 	int num_bits;
f76d5d
 
f76d5d
-	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
f76d5d
+	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
f76d5d
 	emms();
f76d5d
 #else
f76d5d
 	int num_bits;
f76d5d
@@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
f76d5d
 	return num_bits;
f76d5d
 }
f76d5d
 
f76d5d
-extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
f76d5d
+extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
f76d5d
 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
f76d5d
 
f76d5d
 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
f76d5d
@@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
f76d5d
 #elif ARCH_X86_64
f76d5d
 	return _dv_vlc_num_bits_block_x86_64(coeffs);
f76d5d
 #else
f76d5d
-	return _dv_vlc_num_bits_block_x86(coeffs);
f76d5d
+	return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
f76d5d
 #endif
f76d5d
 }
f76d5d
 
f76d5d
--- libdv-0.104-old/libdv/encode_x86.S
f76d5d
+++ libdv-0.104/libdv/encode_x86.S
f76d5d
@@ -23,9 +23,6 @@
f76d5d
  *  The libdv homepage is http://libdv.sourceforge.net/.  
f76d5d
  */
f76d5d
 
f76d5d
-.data
f76d5d
-ALLONE:		.word 1,1,1,1
f76d5d
-VLCADDMASK:	.byte 255,0,0,0,255,0,0,0
f76d5d
 		
f76d5d
 
f76d5d
 .section .note.GNU-stack, "", @progbits
f76d5d
@@ -49,11 +47,14 @@ _dv_vlc_encode_block_mmx:	
f76d5d
 
f76d5d
 	movl	$63, %ecx
f76d5d
 
f76d5d
-	movl	vlc_encode_lookup, %esi
f76d5d
+	movl	4+4*4+8(%esp), %esi              # vlc_encode_lookup
f76d5d
 
f76d5d
 	pxor	%mm0, %mm0
f76d5d
 	pxor	%mm2, %mm2
f76d5d
-	movq	VLCADDMASK, %mm1
f76d5d
+	pushl	$0x000000FF                      # these four lines
f76d5d
+	pushl	$0x000000FF                      # load VLCADDMASK
f76d5d
+	movq	(%esp), %mm1                     # into %mm1 off the stack
f76d5d
+	addl	$8, %esp                         #  --> no TEXTRELs
f76d5d
 	xorl	%ebp, %ebp
f76d5d
 	subl	$8, %edx
f76d5d
 vlc_encode_block_mmx_loop:
f76d5d
@@ -125,7 +128,7 @@ _dv_vlc_num_bits_block_x86:	
f76d5d
 	addl	$2, %edi
f76d5d
 
f76d5d
 	movl	$63, %ecx
f76d5d
-	movl	vlc_num_bits_lookup, %esi
f76d5d
+	movl	4+4*4+4(%esp), %esi              # vlc_num_bits_lookup
f76d5d
 	
f76d5d
 vlc_num_bits_block_x86_loop:
f76d5d
 	movw	(%edi), %ax
f76d5d
@@ -583,8 +594,11 @@ _dv_need_dct_248_mmx_rows:
f76d5d
 	paddw	%mm5, %mm1
f76d5d
 
f76d5d
 	paddw	%mm1, %mm0
f76d5d
-	
f76d5d
-	pmaddwd	ALLONE, %mm0	
f76d5d
+
f76d5d
+	pushl	$0x00010001              # these four lines
f76d5d
+	pushl	$0x00010001              # load ALLONE
f76d5d
+	pmaddwd	(%esp), %mm0             # into %mm0 off the stack
f76d5d
+	addl	$8, %esp                 #  --> no TEXTRELs
f76d5d
 	movq	%mm0, %mm1
f76d5d
 	psrlq	$32, %mm1
f76d5d
 	paddd	%mm1, %mm0
f76d5d
--- libdv-0.104-old/libdv/idct_block_mmx.S
f76d5d
+++ libdv-0.104/libdv/idct_block_mmx.S
f76d5d
@@ -8,17 +8,21 @@
f76d5d
 
f76d5d
 .section .note.GNU-stack, "", @progbits
f76d5d
 
f76d5d
+#include "asm_common.S"
f76d5d
+
f76d5d
 .text
f76d5d
 	.align 4
f76d5d
 .global _dv_idct_block_mmx
f76d5d
 .hidden _dv_idct_block_mmx
f76d5d
 .type   _dv_idct_block_mmx,@function
f76d5d
 _dv_idct_block_mmx:
f76d5d
 	pushl	 %ebp
f76d5d
-	movl	 %esp,%ebp
f76d5d
 	pushl	 %esi
f76d5d
-	leal	 preSC, %ecx
f76d5d
-	movl	 8(%ebp),%esi		/* source matrix */
f76d5d
+
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	leal	 MUNG(preSC), %ecx
f76d5d
+	movl	 12(%esp),%esi		/* source matrix */
f76d5d
 
f76d5d
 /* 
f76d5d
  *	column 0: even part
f76d5d
@@ -34,7 +40,7 @@ _dv_idct_block_mmx:
f76d5d
 	movq %mm1, %mm2			/* added 11/1/96 */
f76d5d
 	pmulhw 8*8(%esi),%mm5		/* V8 */
f76d5d
 	psubsw %mm0, %mm1		/* V16 */
f76d5d
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V18 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V18 */
f76d5d
 	paddsw %mm0, %mm2		/* V17 */
f76d5d
 	movq %mm2, %mm0			/* duplicate V17 */
f76d5d
 	psraw $1, %mm2			/* t75=t82 */
f76d5d
@@ -75,7 +81,7 @@ _dv_idct_block_mmx:
f76d5d
 	paddsw %mm0, %mm3		/* V29 ; free mm0 */
f76d5d
 	movq %mm7, %mm1			/* duplicate V26 */
f76d5d
 	psraw $1, %mm3			/* t91=t94 */
f76d5d
-	pmulhw x539f539f539f539f,%mm7	/* V33 */
f76d5d
+	pmulhw MUNG(x539f539f539f539f),%mm7	/* V33 */
f76d5d
 	psraw $1, %mm1			/* t96 */
f76d5d
 	movq %mm5, %mm0			/* duplicate V2 */
f76d5d
 	psraw $2, %mm4			/* t85=t87 */
f76d5d
@@ -83,15 +89,15 @@ _dv_idct_block_mmx:
f76d5d
 	psubsw %mm4, %mm0		/* V28 ; free mm4 */
f76d5d
 	movq %mm0, %mm2			/* duplicate V28 */
f76d5d
 	psraw $1, %mm5			/* t90=t93 */
f76d5d
-	pmulhw x4546454645464546,%mm0	/* V35 */
f76d5d
+	pmulhw MUNG(x4546454645464546),%mm0	/* V35 */
f76d5d
 	psraw $1, %mm2			/* t97 */
f76d5d
 	movq %mm5, %mm4			/* duplicate t90=t93 */
f76d5d
 	psubsw %mm2, %mm1		/* V32 ; free mm2 */
f76d5d
-	pmulhw x61f861f861f861f8,%mm1	/* V36 */
f76d5d
+	pmulhw MUNG(x61f861f861f861f8),%mm1	/* V36 */
f76d5d
 	psllw $1, %mm7			/* t107 */
f76d5d
 	paddsw %mm3, %mm5		/* V31 */
f76d5d
 	psubsw %mm3, %mm4		/* V30 ; free mm3 */
f76d5d
-	pmulhw x5a825a825a825a82,%mm4	/* V34 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82),%mm4	/* V34 */
f76d5d
 	nop
f76d5d
 	psubsw %mm1, %mm0		/* V38 */
f76d5d
 	psubsw %mm7, %mm1		/* V37 ; free mm7 */
f76d5d
@@ -158,7 +164,7 @@ _dv_idct_block_mmx:
f76d5d
 	psubsw %mm7, %mm1		/* V50 */
f76d5d
 	pmulhw 8*9(%esi), %mm5		/* V9 */
f76d5d
 	paddsw %mm7, %mm2		/* V51 */
f76d5d
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V52 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V52 */
f76d5d
 	movq %mm2, %mm6			/* duplicate V51 */
f76d5d
 	psraw $1, %mm2			/* t138=t144 */
f76d5d
 	movq %mm3, %mm4			/* duplicate V1 */
f76d5d
@@ -199,11 +205,11 @@ _dv_idct_block_mmx:
f76d5d
  * even more by doing the correction step in a later stage when the number
f76d5d
  * is actually multiplied by 16
f76d5d
  */
f76d5d
-	paddw x0005000200010001, %mm4
f76d5d
+	paddw MUNG(x0005000200010001), %mm4
f76d5d
 	psubsw %mm6, %mm3		/* V60 ; free mm6 */
f76d5d
 	psraw $1, %mm0			/* t154=t156 */
f76d5d
 	movq %mm3, %mm1			/* duplicate V60 */
f76d5d
-	pmulhw x539f539f539f539f, %mm1	/* V67 */
f76d5d
+	pmulhw MUNG(x539f539f539f539f), %mm1	/* V67 */
f76d5d
 	movq %mm5, %mm6			/* duplicate V3 */
f76d5d
 	psraw $2, %mm4			/* t148=t150 */
f76d5d
 	paddsw %mm4, %mm5		/* V61 */
f76d5d
@@ -212,13 +218,13 @@ _dv_idct_block_mmx:
f76d5d
 	psllw $1, %mm1			/* t169 */
f76d5d
 	paddsw %mm0, %mm5		/* V65 -> result */
f76d5d
 	psubsw %mm0, %mm4		/* V64 ; free mm0 */
f76d5d
-	pmulhw x5a825a825a825a82, %mm4	/* V68 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82), %mm4	/* V68 */
f76d5d
 	psraw $1, %mm3			/* t158 */
f76d5d
 	psubsw %mm6, %mm3		/* V66 */
f76d5d
 	movq %mm5, %mm2			/* duplicate V65 */
f76d5d
-	pmulhw x61f861f861f861f8, %mm3	/* V70 */
f76d5d
+	pmulhw MUNG(x61f861f861f861f8), %mm3	/* V70 */
f76d5d
 	psllw $1, %mm6			/* t165 */
f76d5d
-	pmulhw x4546454645464546, %mm6	/* V69 */
f76d5d
+	pmulhw MUNG(x4546454645464546), %mm6	/* V69 */
f76d5d
 	psraw $1, %mm2			/* t172 */
f76d5d
 /* moved from next block */
f76d5d
 	movq 8*5(%esi), %mm0		/* V56 */
f76d5d
@@ -343,7 +349,7 @@ _dv_idct_block_mmx:
f76d5d
 *	movq 8*13(%esi), %mm4		tmt13
f76d5d
 */
f76d5d
 	psubsw %mm4, %mm3		/* V134 */
f76d5d
-	pmulhw x5a825a825a825a82, %mm3	/* 23170 ->V136 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82), %mm3	/* 23170 ->V136 */
f76d5d
 	movq 8*9(%esi), %mm6		/* tmt9 */
f76d5d
 	paddsw %mm4, %mm5		/* V135 ; mm4 free */
f76d5d
 	movq %mm0, %mm4			/* duplicate tmt1 */
f76d5d
@@ -372,17 +378,17 @@ _dv_idct_block_mmx:
f76d5d
 	psubsw %mm7, %mm0		/* V144 */
f76d5d
 	movq %mm0, %mm3			/* duplicate V144 */
f76d5d
 	paddsw %mm7, %mm2		/* V147 ; free mm7 */
f76d5d
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V151 */
f76d5d
+	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V151 */
f76d5d
 	movq %mm1, %mm7			/* duplicate tmt3 */
f76d5d
 	paddsw %mm5, %mm7		/* V145 */
f76d5d
 	psubsw %mm5, %mm1		/* V146 ; free mm5 */
f76d5d
 	psubsw %mm1, %mm3		/* V150 */
f76d5d
 	movq %mm7, %mm5			/* duplicate V145 */
f76d5d
-	pmulhw x4546454645464546, %mm1	/* 17734-> V153 */
f76d5d
+	pmulhw MUNG(x4546454645464546), %mm1	/* 17734-> V153 */
f76d5d
 	psubsw %mm2, %mm5		/* V148 */
f76d5d
-	pmulhw x61f861f861f861f8, %mm3	/* 25080-> V154 */
f76d5d
+	pmulhw MUNG(x61f861f861f861f8), %mm3	/* 25080-> V154 */
f76d5d
 	psllw $2, %mm0			/* t311 */
f76d5d
-	pmulhw x5a825a825a825a82, %mm5	/* 23170-> V152 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82), %mm5	/* 23170-> V152 */
f76d5d
 	paddsw %mm2, %mm7		/* V149 ; free mm2 */
f76d5d
 	psllw $1, %mm1			/* t313 */
f76d5d
 	nop	/* without the nop - freeze here for one clock */
f76d5d
@@ -408,7 +414,7 @@ _dv_idct_block_mmx:
f76d5d
 	paddsw %mm3, %mm6		/* V164 ; free mm3 */
f76d5d
 	movq %mm4, %mm3			/* duplicate V142 */
f76d5d
 	psubsw %mm5, %mm4		/* V165 ; free mm5 */
f76d5d
-	movq %mm2, scratch7		/* out7 */
f76d5d
+	movq %mm2, MUNG(scratch7)		/* out7 */
f76d5d
 	psraw $4, %mm6
f76d5d
 	psraw $4, %mm4
f76d5d
 	paddsw %mm5, %mm3		/* V162 */
f76d5d
@@ -419,11 +425,11 @@ _dv_idct_block_mmx:
f76d5d
  */
f76d5d
 	movq %mm6, 8*9(%esi)		/* out9 */
f76d5d
 	paddsw %mm1, %mm0		/* V161 */
f76d5d
-	movq %mm3, scratch5		/* out5 */
f76d5d
+	movq %mm3, MUNG(scratch5)		/* out5 */
f76d5d
 	psubsw %mm1, %mm5		/* V166 ; free mm1 */
f76d5d
 	movq %mm4, 8*11(%esi)		/* out11 */
f76d5d
 	psraw $4, %mm5
f76d5d
-	movq %mm0, scratch3		/* out3 */
f76d5d
+	movq %mm0, MUNG(scratch3)		/* out3 */
f76d5d
 	movq %mm2, %mm4			/* duplicate V140 */
f76d5d
 	movq %mm5, 8*13(%esi)		/* out13 */
f76d5d
 	paddsw %mm7, %mm2		/* V160 */
f76d5d
@@ -433,7 +439,7 @@ _dv_idct_block_mmx:
f76d5d
 /* moved from the next block */
f76d5d
 	movq 8*3(%esi), %mm7
f76d5d
 	psraw $4, %mm4
f76d5d
-	movq %mm2, scratch1		/* out1 */
f76d5d
+	movq %mm2, MUNG(scratch1)		/* out1 */
f76d5d
 /* moved from the next block */
f76d5d
 	movq %mm0, %mm1
f76d5d
 	movq %mm4, 8*15(%esi)		/* out15 */
f76d5d
@@ -490,15 +496,15 @@ _dv_idct_block_mmx:
f76d5d
 	paddsw %mm4, %mm3		/* V113 ; free mm4 */
f76d5d
 	movq %mm0, %mm4			/* duplicate V110 */
f76d5d
 	paddsw %mm1, %mm2		/* V111 */
f76d5d
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V117 */
f76d5d
+	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V117 */
f76d5d
 	psubsw %mm1, %mm5		/* V112 ; free mm1 */
f76d5d
 	psubsw %mm5, %mm4		/* V116 */
f76d5d
 	movq %mm2, %mm1			/* duplicate V111 */
f76d5d
-	pmulhw x4546454645464546, %mm5	/* 17734-> V119 */
f76d5d
+	pmulhw MUNG(x4546454645464546), %mm5	/* 17734-> V119 */
f76d5d
 	psubsw %mm3, %mm2		/* V114 */
f76d5d
-	pmulhw x61f861f861f861f8, %mm4	/* 25080-> V120 */
f76d5d
+	pmulhw MUNG(x61f861f861f861f8), %mm4	/* 25080-> V120 */
f76d5d
 	paddsw %mm3, %mm1		/* V115 ; free mm3 */
f76d5d
-	pmulhw x5a825a825a825a82, %mm2	/* 23170-> V118 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82), %mm2	/* 23170-> V118 */
f76d5d
 	psllw $2, %mm0			/* t266 */
f76d5d
 	movq %mm1, (%esi)		/* save V115 */
f76d5d
 	psllw $1, %mm5			/* t268 */
f76d5d
@@ -516,7 +522,7 @@ _dv_idct_block_mmx:
f76d5d
 	movq %mm6, %mm3			/* duplicate tmt4 */
f76d5d
 	psubsw %mm0, %mm6		/* V100 */
f76d5d
 	paddsw %mm0, %mm3		/* V101 ; free mm0 */
f76d5d
-	pmulhw x5a825a825a825a82, %mm6	/* 23170 ->V102 */
f76d5d
+	pmulhw MUNG(x5a825a825a825a82), %mm6	/* 23170 ->V102 */
f76d5d
 	movq %mm7, %mm5			/* duplicate tmt0 */
f76d5d
 	movq 8*8(%esi), %mm1		/* tmt8 */
f76d5d
 	paddsw %mm1, %mm7		/* V103 */
f76d5d
@@ -550,10 +556,10 @@ _dv_idct_block_mmx:
f76d5d
 	movq 8*2(%esi), %mm3		/* V123 */
f76d5d
 	paddsw %mm4, %mm7		/* out0 */
f76d5d
 /* moved up from next block */
f76d5d
-	movq scratch3, %mm0
f76d5d
+	movq MUNG(scratch3), %mm0
f76d5d
 	psraw $4, %mm7
f76d5d
 /* moved up from next block */
f76d5d
-	movq scratch5, %mm6 
f76d5d
+	movq MUNG(scratch5), %mm6 
f76d5d
 	psubsw %mm4, %mm1		/* out14 ; free mm4 */
f76d5d
 	paddsw %mm3, %mm5		/* out2 */
f76d5d
 	psraw $4, %mm1
f76d5d
@@ -564,7 +570,7 @@ _dv_idct_block_mmx:
f76d5d
 	movq %mm5, 8*2(%esi)		/* out2 ; free mm5 */
f76d5d
 	psraw $4, %mm2
f76d5d
 /* moved up to the prev block */
f76d5d
-	movq scratch7, %mm4
f76d5d
+	movq MUNG(scratch7), %mm4
f76d5d
 /* moved up to the prev block */
f76d5d
 	psraw $4, %mm0
f76d5d
 	movq %mm2, 8*12(%esi)		/* out12 ; free mm2 */
f76d5d
@@ -578,7 +584,7 @@ _dv_idct_block_mmx:
f76d5d
  *	psraw $4, %mm0
f76d5d
  *	psraw $4, %mm6
f76d5d
 */
f76d5d
-	movq scratch1, %mm1
f76d5d
+	movq MUNG(scratch1), %mm1
f76d5d
 	psraw $4, %mm4
f76d5d
 	movq %mm0, 8*3(%esi)		/* out3 */
f76d5d
 	psraw $4, %mm1
f76d5d
--- libdv-0.104-old/libdv/parse.c
f76d5d
+++ libdv-0.104/libdv/parse.c
f76d5d
@@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
f76d5d
   exit(0);
f76d5d
 #endif
f76d5d
 } /* dv_parse_ac_coeffs */
f76d5d
+#if defined __GNUC__ && __ELF__
f76d5d
+# define dv_strong_hidden_alias(name, aliasname) \
f76d5d
+    extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
f76d5d
+dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
f76d5d
+#else
f76d5d
+int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
f76d5d
+#endif
f76d5d
 
f76d5d
 /* ---------------------------------------------------------------------------
f76d5d
  */
f76d5d
--- libdv-0.104-old/libdv/quant.c
f76d5d
+++ libdv-0.104/libdv/quant.c
f76d5d
@@ -144,7 +144,7 @@ uint8_t  dv_quant_offset[4] = { 6,3,0,1 
f76d5d
 uint32_t	dv_quant_248_mul_tab [2] [22] [64];
f76d5d
 uint32_t dv_quant_88_mul_tab [2] [22] [64];
f76d5d
 
f76d5d
-extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
f76d5d
+extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts);
f76d5d
 extern void             _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
f76d5d
 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
f76d5d
 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
f76d5d
@@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
f76d5d
 		_dv_quant_x86_64(block, qno, klass);
f76d5d
 		emms();
f76d5d
 #else
f76d5d
-		_dv_quant_x86(block, qno, klass);
f76d5d
+		_dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
f76d5d
 		emms();
f76d5d
 #endif
f76d5d
 	}
f76d5d
--- libdv-0.104-old/libdv/quant.h
f76d5d
+++ libdv-0.104/libdv/quant.h
f76d5d
@@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
f76d5d
 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
f76d5d
 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
f76d5d
                                   dv_248_coeff_t *co);
f76d5d
-extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
f76d5d
+extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts);
f76d5d
 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
f76d5d
 extern void dv_quant_init (void);
f76d5d
 #ifdef __cplusplus
f76d5d
--- libdv-0.104-old/libdv/quant_x86.S
f76d5d
+++ libdv-0.104/libdv/quant_x86.S
f76d5d
@@ -73,10 +75,13 @@ _dv_quant_88_inverse_x86:	
f76d5d
 	
f76d5d
 	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
f76d5d
 	movl	ARGn(1),%eax	/* qno */
f76d5d
+	movl	ARGn(3),%ebx	/* dv_quant_offset */
f76d5d
+	addl	ARGn(2),%ebx	/* class */
f76d5d
+	movzbl	(%ebx),%ecx
f76d5d
 	movl	ARGn(2),%ebx	/* class */
f76d5d
-	movzbl	dv_quant_offset(%ebx),%ecx
f76d5d
 	addl	%ecx,%eax
f76d5d
-	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
f76d5d
+	movl	ARGn(4),%edx	/* dv_quant_shifts */
f76d5d
+	leal	(%edx,%eax,4),%edx	/* edx is pq */
f76d5d
 
f76d5d
 	/* extra = (class == 3); */
f76d5d
 				/*  0   1   2   3 */
f76d5d
@@ -214,11 +221,13 @@ _dv_quant_x86:	
f76d5d
 	
f76d5d
 	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
f76d5d
 	movl	ARGn(1),%eax	/* qno */
f76d5d
+	movl	ARGn(3),%ebx	/* offset */
f76d5d
+	addl	ARGn(2),%ebx	/* class */
f76d5d
+	movzbl	(%ebx),%ecx
f76d5d
 	movl	ARGn(2),%ebx	/* class */
f76d5d
-
f76d5d
-	movzbl	dv_quant_offset(%ebx),%ecx
f76d5d
+	movl	ARGn(4),%edx	/* shifts */
f76d5d
 	addl	%ecx,%eax
f76d5d
-	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
f76d5d
+	leal	(%edx,%eax,4),%edx	/* edx is pq */
f76d5d
 
f76d5d
 	/* extra = (class == 3); */
f76d5d
 				/*  0   1   2   3 */
f76d5d
--- libdv-0.104-old/libdv/rgbtoyuv.S
f76d5d
+++ libdv-0.104/libdv/rgbtoyuv.S
f76d5d
@@ -41,9 +41,6 @@
f76d5d
 #define DV_WIDTH_SHORT_HALF 720
f76d5d
 #define DV_WIDTH_BYTE_HALF  360	
f76d5d
 		
f76d5d
-.global _dv_rgbtoycb_mmx
f76d5d
-# .global yuvtoycb_mmx
f76d5d
-
f76d5d
 .data
f76d5d
 
f76d5d
 .align 8
f76d5d
@@ -110,25 +107,26 @@ VR0GR:  .long   0,0
f76d5d
 VBG0B:  .long   0,0
f76d5d
 	
f76d5d
 #endif	
f76d5d
-	
f76d5d
+
f76d5d
+#include "asm_common.S"
f76d5d
+
f76d5d
 .section .note.GNU-stack, "", @progbits
f76d5d
 
f76d5d
 .text
f76d5d
 
f76d5d
-#define _inPtr     8
f76d5d
-#define _rows      12
f76d5d
-#define _columns   16
f76d5d
-#define _outyPtr   20
f76d5d
-#define _outuPtr   24
f76d5d
-#define _outvPtr   28
f76d5d
+#define _inPtr     24+8
f76d5d
+#define _rows      24+12
f76d5d
+#define _columns   24+16
f76d5d
+#define _outyPtr   24+20
f76d5d
+#define _outuPtr   24+24
f76d5d
+#define _outvPtr   24+28
f76d5d
 
f76d5d
 .global _dv_rgbtoycb_mmx
f76d5d
 .hidden _dv_rgbtoycb_mmx
f76d5d
 .type   _dv_rgbtoycb_mmx,@function
f76d5d
 _dv_rgbtoycb_mmx:
f76d5d
 
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %eax
f76d5d
 	pushl   %ebx
f76d5d
 	pushl   %ecx
f76d5d
@@ -133,46 +134,47 @@ _dv_rgbtoycb_mmx:
f76d5d
 	pushl   %esi
f76d5d
 	pushl   %edi
f76d5d
 
f76d5d
-	leal    ZEROSX, %eax    #This section gets around a bug
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	leal    MUNG(ZEROSX), %eax    #This section gets around a bug
f76d5d
 	movq    (%eax), %mm0    #unlikely to persist
f76d5d
-	movq    %mm0, ZEROS
f76d5d
-	leal    OFFSETDX, %eax
f76d5d
+	movq    %mm0, MUNG(ZEROS)
f76d5d
+	leal    MUNG(OFFSETDX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, OFFSETD
f76d5d
-	leal    OFFSETWX, %eax
f76d5d
+	movq    %mm0, MUNG(OFFSETD)
f76d5d
+	leal    MUNG(OFFSETWX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, OFFSETW
f76d5d
-	leal    OFFSETBX, %eax
f76d5d
+	movq    %mm0, MUNG(OFFSETW)
f76d5d
+	leal    MUNG(OFFSETBX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, OFFSETB
f76d5d
-	leal    YR0GRX, %eax
f76d5d
+	movq    %mm0, MUNG(OFFSETB)
f76d5d
+	leal    MUNG(YR0GRX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, YR0GR
f76d5d
-	leal    YBG0BX, %eax
f76d5d
+	movq    %mm0, MUNG(YR0GR)
f76d5d
+	leal    MUNG(YBG0BX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, YBG0B
f76d5d
-	leal    UR0GRX, %eax
f76d5d
+	movq    %mm0, MUNG(YBG0B)
f76d5d
+	leal    MUNG(UR0GRX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, UR0GR
f76d5d
-	leal    UBG0BX, %eax
f76d5d
+	movq    %mm0, MUNG(UR0GR)
f76d5d
+	leal    MUNG(UBG0BX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, UBG0B
f76d5d
-	leal    VR0GRX, %eax
f76d5d
+	movq    %mm0, MUNG(UBG0B)
f76d5d
+	leal    MUNG(VR0GRX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, VR0GR
f76d5d
-	leal    VBG0BX, %eax
f76d5d
+	movq    %mm0, MUNG(VR0GR)
f76d5d
+	leal    MUNG(VBG0BX), %eax
f76d5d
 	movq    (%eax), %mm0
f76d5d
-	movq    %mm0, VBG0B
f76d5d
-	
f76d5d
-	movl    _rows(%ebp), %eax
f76d5d
-	movl    _columns(%ebp), %ebx
f76d5d
+	movq    %mm0, MUNG(VBG0B)
f76d5d
+	movl    _rows(%esp), %eax
f76d5d
+	movl    _columns(%esp), %ebx
f76d5d
 	mull    %ebx            #number pixels
f76d5d
 	shrl    $3, %eax        #number of loops
f76d5d
 	movl    %eax, %edi      #loop counter in edi
f76d5d
-	movl    _inPtr(%ebp), %eax
f76d5d
-	movl    _outyPtr(%ebp), %ebx
f76d5d
-	movl    _outuPtr(%ebp), %ecx
f76d5d
-	movl    _outvPtr(%ebp), %edx
f76d5d
+	movl    _inPtr(%esp), %eax
f76d5d
+	movl    _outyPtr(%esp), %ebx
f76d5d
+	movl    _outuPtr(%esp), %ecx
f76d5d
+	movl    _outvPtr(%esp), %edx
f76d5d
 rgbtoycb_mmx_loop: 
f76d5d
 	movq    (%eax), %mm1    #load G2R2B1G1R1B0G0R0
f76d5d
 	pxor    %mm6, %mm6      #0 -> mm6
f76d5d
@@ -186,29 +188,29 @@ rgbtoycb_mmx_loop: 
f76d5d
 	punpcklbw %mm6, %mm1     #B1G1R1B0 -> mm1
f76d5d
 	movq    %mm0, %mm2      #R1B0G0R0 -> mm2
f76d5d
 
f76d5d
-	pmaddwd YR0GR, %mm0     #yrR1,ygG0+yrR0 -> mm0
f76d5d
+	pmaddwd MUNG(YR0GR), %mm0     #yrR1,ygG0+yrR0 -> mm0
f76d5d
 	movq    %mm1, %mm3      #B1G1R1B0 -> mm3
f76d5d
 
f76d5d
-	pmaddwd YBG0B, %mm1     #ybB1+ygG1,ybB0 -> mm1
f76d5d
+	pmaddwd MUNG(YBG0B), %mm1     #ybB1+ygG1,ybB0 -> mm1
f76d5d
 	movq    %mm2, %mm4      #R1B0G0R0 -> mm4
f76d5d
 
f76d5d
-	pmaddwd UR0GR, %mm2     #urR1,ugG0+urR0 -> mm2
f76d5d
+	pmaddwd MUNG(UR0GR), %mm2     #urR1,ugG0+urR0 -> mm2
f76d5d
 	movq    %mm3, %mm5      #B1G1R1B0 -> mm5
f76d5d
 
f76d5d
-	pmaddwd UBG0B, %mm3     #ubB1+ugG1,ubB0 -> mm3
f76d5d
+	pmaddwd MUNG(UBG0B), %mm3     #ubB1+ugG1,ubB0 -> mm3
f76d5d
 	punpckhbw       %mm6, %mm7 #    00G2R2 -> mm7
f76d5d
 
f76d5d
-	pmaddwd VR0GR, %mm4     #vrR1,vgG0+vrR0 -> mm4
f76d5d
+	pmaddwd MUNG(VR0GR), %mm4     #vrR1,vgG0+vrR0 -> mm4
f76d5d
 	paddd   %mm1, %mm0      #Y1Y0 -> mm0
f76d5d
 
f76d5d
-	pmaddwd VBG0B, %mm5     #vbB1+vgG1,vbB0 -> mm5
f76d5d
+	pmaddwd MUNG(VBG0B), %mm5     #vbB1+vgG1,vbB0 -> mm5
f76d5d
 
f76d5d
 	movq    8(%eax), %mm1   #R5B4G4R4B3G3R3B2 -> mm1
f76d5d
 	paddd   %mm3, %mm2      #U1U0 -> mm2
f76d5d
 
f76d5d
 	movq    %mm1, %mm6      #R5B4G4R4B3G3R3B2 -> mm6
f76d5d
 
f76d5d
-	punpcklbw       ZEROS, %mm1     #B3G3R3B2 -> mm1
f76d5d
+	punpcklbw       MUNG(ZEROS), %mm1     #B3G3R3B2 -> mm1
f76d5d
 	paddd   %mm5, %mm4      #V1V0 -> mm4
f76d5d
 
f76d5d
 	movq    %mm1, %mm5      #B3G3R3B2 -> mm5
f76d5d
@@ -216,29 +218,29 @@ rgbtoycb_mmx_loop: 
f76d5d
 
f76d5d
 	paddd   %mm7, %mm1      #R3B200+00G2R2=R3B2G2R2->mm1
f76d5d
 
f76d5d
-	punpckhbw       ZEROS, %mm6     #R5B4G4R3 -> mm6
f76d5d
+	punpckhbw       MUNG(ZEROS), %mm6     #R5B4G4R3 -> mm6
f76d5d
 	movq    %mm1, %mm3      #R3B2G2R2 -> mm3
f76d5d
 
f76d5d
-	pmaddwd YR0GR, %mm1     #yrR3,ygG2+yrR2 -> mm1
f76d5d
+	pmaddwd MUNG(YR0GR), %mm1     #yrR3,ygG2+yrR2 -> mm1
f76d5d
 	movq    %mm5, %mm7      #B3G3R3B2 -> mm7
f76d5d
 
f76d5d
-	pmaddwd YBG0B, %mm5     #ybB3+ygG3,ybB2 -> mm5
f76d5d
+	pmaddwd MUNG(YBG0B), %mm5     #ybB3+ygG3,ybB2 -> mm5
f76d5d
 	psrad   $FIXPSHIFT, %mm0       #32-bit scaled Y1Y0 -> mm0
f76d5d
 
f76d5d
-	movq    %mm6, TEMP0     #R5B4G4R4 -> TEMP0
f76d5d
+	movq    %mm6, MUNG(TEMP0)     #R5B4G4R4 -> TEMP0
f76d5d
 	movq    %mm3, %mm6      #R3B2G2R2 -> mm6
f76d5d
-	pmaddwd UR0GR, %mm6     #urR3,ugG2+urR2 -> mm6
f76d5d
+	pmaddwd MUNG(UR0GR), %mm6     #urR3,ugG2+urR2 -> mm6
f76d5d
 	psrad   $FIXPSHIFT, %mm2       #32-bit scaled U1U0 -> mm2
f76d5d
 
f76d5d
 	paddd   %mm5, %mm1      #Y3Y2 -> mm1
f76d5d
 	movq    %mm7, %mm5      #B3G3R3B2 -> mm5
f76d5d
-	pmaddwd UBG0B, %mm7     #ubB3+ugG3,ubB2
f76d5d
+	pmaddwd MUNG(UBG0B), %mm7     #ubB3+ugG3,ubB2
f76d5d
 	psrad   $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
f76d5d
 
f76d5d
-	pmaddwd VR0GR, %mm3     #vrR3,vgG2+vgR2
f76d5d
+	pmaddwd MUNG(VR0GR), %mm3     #vrR3,vgG2+vgR2
f76d5d
 	packssdw        %mm1, %mm0      #Y3Y2Y1Y0 -> mm0
f76d5d
 
f76d5d
-	pmaddwd VBG0B, %mm5     #vbB3+vgG3,vbB2 -> mm5
f76d5d
+	pmaddwd MUNG(VBG0B), %mm5     #vbB3+vgG3,vbB2 -> mm5
f76d5d
 	psrad   $FIXPSHIFT, %mm4       #32-bit scaled V1V0 -> mm4
f76d5d
 
f76d5d
 	movq    16(%eax), %mm1  #B7G7R7B6G6R6B5G5 -> mm7
f76d5d
@@ -253,58 +255,58 @@ rgbtoycb_mmx_loop: 
f76d5d
 	movq    %mm7, %mm5      #R7B6G6R6B5G500 -> mm5
f76d5d
 	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V3V2 -> mm3
f76d5d
 
f76d5d
-	paddw	OFFSETY, %mm0
f76d5d
+	paddw	MUNG(OFFSETY), %mm0
f76d5d
 	movq    %mm0, (%ebx)     #store Y3Y2Y1Y0 
f76d5d
 	packssdw %mm6, %mm2      #32-bit scaled U3U2U1U0 -> mm2
f76d5d
 
f76d5d
-	movq    TEMP0, %mm0     #R5B4G4R4 -> mm0
f76d5d
+	movq    MUNG(TEMP0), %mm0     #R5B4G4R4 -> mm0
f76d5d
 	addl	$8, %ebx
f76d5d
-	
f76d5d
-	punpcklbw       ZEROS, %mm7     #B5G500 -> mm7
f76d5d
+
f76d5d
+	punpcklbw       MUNG(ZEROS), %mm7     #B5G500 -> mm7
f76d5d
 	movq    %mm0, %mm6      #R5B4G4R4 -> mm6
f76d5d
 
f76d5d
-	movq    %mm2, TEMPU     #32-bit scaled U3U2U1U0 -> TEMPU
f76d5d
+	movq    %mm2, MUNG(TEMPU)     #32-bit scaled U3U2U1U0 -> TEMPU
f76d5d
 	psrlq   $32, %mm0       #00R5B4 -> mm0
f76d5d
 
f76d5d
 	paddw   %mm0, %mm7      #B5G5R5B4 -> mm7
f76d5d
 	movq    %mm6, %mm2      #B5B4G4R4 -> mm2
f76d5d
 
f76d5d
-	pmaddwd YR0GR, %mm2     #yrR5,ygG4+yrR4 -> mm2
f76d5d
+	pmaddwd MUNG(YR0GR), %mm2     #yrR5,ygG4+yrR4 -> mm2
f76d5d
 	movq    %mm7, %mm0      #B5G5R5B4 -> mm0
f76d5d
 
f76d5d
-	pmaddwd YBG0B, %mm7     #ybB5+ygG5,ybB4 -> mm7
f76d5d
+	pmaddwd MUNG(YBG0B), %mm7     #ybB5+ygG5,ybB4 -> mm7
f76d5d
 	packssdw        %mm3, %mm4      #32-bit scaled V3V2V1V0 -> mm4
f76d5d
 
f76d5d
 	addl    $24, %eax       #increment RGB count
f76d5d
 
f76d5d
-	movq    %mm4, TEMPV     #(V3V2V1V0)/256 -> mm4
f76d5d
+	movq    %mm4, MUNG(TEMPV)     #(V3V2V1V0)/256 -> mm4
f76d5d
 	movq    %mm6, %mm4      #B5B4G4R4 -> mm4
f76d5d
 
f76d5d
-	pmaddwd UR0GR, %mm6     #urR5,ugG4+urR4
f76d5d
+	pmaddwd MUNG(UR0GR), %mm6     #urR5,ugG4+urR4
f76d5d
 	movq    %mm0, %mm3      #B5G5R5B4 -> mm0
f76d5d
 
f76d5d
-	pmaddwd UBG0B, %mm0     #ubB5+ugG5,ubB4
f76d5d
+	pmaddwd MUNG(UBG0B), %mm0     #ubB5+ugG5,ubB4
f76d5d
 	paddd   %mm7, %mm2      #Y5Y4 -> mm2
f76d5d
 
f76d5d
-	pmaddwd         VR0GR, %mm4     #vrR5,vgG4+vrR4 -> mm4
f76d5d
+	pmaddwd         MUNG(VR0GR), %mm4     #vrR5,vgG4+vrR4 -> mm4
f76d5d
 	pxor    %mm7, %mm7      #0 -> mm7
f76d5d
 
f76d5d
-	pmaddwd VBG0B, %mm3     #vbB5+vgG5,vbB4 -> mm3
f76d5d
+	pmaddwd MUNG(VBG0B), %mm3     #vbB5+vgG5,vbB4 -> mm3
f76d5d
 	punpckhbw       %mm7, %mm1      #B7G7R7B6 -> mm1
f76d5d
 
f76d5d
 	paddd   %mm6, %mm0      #U5U4 -> mm0
f76d5d
 	movq    %mm1, %mm6      #B7G7R7B6 -> mm6
f76d5d
 
f76d5d
-	pmaddwd YBG0B, %mm6     #ybB7+ygG7,ybB6 -> mm6
f76d5d
+	pmaddwd MUNG(YBG0B), %mm6     #ybB7+ygG7,ybB6 -> mm6
f76d5d
 	punpckhbw       %mm7, %mm5      #R7B6G6R6 -> mm5
f76d5d
 
f76d5d
 	movq    %mm5, %mm7      #R7B6G6R6 -> mm7
f76d5d
 	paddd   %mm4, %mm3      #V5V4 -> mm3
f76d5d
 
f76d5d
-	pmaddwd YR0GR, %mm5     #yrR7,ygG6+yrR6 -> mm5
f76d5d
+	pmaddwd MUNG(YR0GR), %mm5     #yrR7,ygG6+yrR6 -> mm5
f76d5d
 	movq    %mm1, %mm4      #B7G7R7B6 -> mm4
f76d5d
 
f76d5d
-	pmaddwd UBG0B, %mm4     #ubB7+ugG7,ubB6 -> mm4
f76d5d
+	pmaddwd MUNG(UBG0B), %mm4     #ubB7+ugG7,ubB6 -> mm4
f76d5d
 	psrad   $FIXPSHIFT, %mm0       #32-bit scaled U5U4 -> mm0
f76d5d
 
f76d5d
 	psrad   $FIXPSHIFT, %mm2       #32-bit scaled Y5Y4 -> mm2
f76d5d
@@ -312,25 +314,25 @@ rgbtoycb_mmx_loop: 
f76d5d
 	paddd   %mm5, %mm6      #Y7Y6 -> mm6
f76d5d
 	movq    %mm7, %mm5      #R7B6G6R6 -> mm5
f76d5d
 
f76d5d
-	pmaddwd UR0GR, %mm7     #urR7,ugG6+ugR6 -> mm7
f76d5d
+	pmaddwd MUNG(UR0GR), %mm7     #urR7,ugG6+ugR6 -> mm7
f76d5d
 	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V5V4 -> mm3
f76d5d
 
f76d5d
-	pmaddwd VBG0B, %mm1     #vbB7+vgG7,vbB6 -> mm1
f76d5d
+	pmaddwd MUNG(VBG0B), %mm1     #vbB7+vgG7,vbB6 -> mm1
f76d5d
 	psrad   $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
f76d5d
 
f76d5d
 	packssdw %mm6, %mm2     #Y7Y6Y5Y4 -> mm2
f76d5d
 
f76d5d
-	pmaddwd VR0GR, %mm5     #vrR7,vgG6+vrR6 -> mm5
f76d5d
+	pmaddwd MUNG(VR0GR), %mm5     #vrR7,vgG6+vrR6 -> mm5
f76d5d
 	paddd   %mm4, %mm7      #U7U6 -> mm7    
f76d5d
 
f76d5d
 	psrad   $FIXPSHIFT, %mm7       #32-bit scaled U7U6 -> mm7
f76d5d
-	paddw	OFFSETY, %mm2
f76d5d
+	paddw	MUNG(OFFSETY), %mm2
f76d5d
 	movq	%mm2, (%ebx)    #store Y7Y6Y5Y4 
f76d5d
 
f76d5d
-	movq	ALLONE, %mm6
f76d5d
+	movq	MUNG(ALLONE), %mm6
f76d5d
 	packssdw %mm7, %mm0     #32-bit scaled U7U6U5U4 -> mm0
f76d5d
 
f76d5d
-	movq    TEMPU, %mm4     #32-bit scaled U3U2U1U0 -> mm4
f76d5d
+	movq    MUNG(TEMPU), %mm4     #32-bit scaled U3U2U1U0 -> mm4
f76d5d
 	pmaddwd	%mm6, %mm0      #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
f76d5d
 	
f76d5d
 	pmaddwd	%mm6, %mm4      #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
f76d5d
@@ -340,8 +342,8 @@ rgbtoycb_mmx_loop: 
f76d5d
 
f76d5d
 	psrad   $FIXPSHIFT, %mm1       #32-bit scaled V7V6 -> mm1
f76d5d
 	psraw	$1, %mm4 	#divide UU3 UU2 UU1 UU0 by 2 -> mm4
f76d5d
-		
f76d5d
-	movq    TEMPV, %mm5     #32-bit scaled V3V2V1V0 -> mm5
f76d5d
+
f76d5d
+	movq    MUNG(TEMPV), %mm5     #32-bit scaled V3V2V1V0 -> mm5
f76d5d
 
f76d5d
 	movq	%mm4, (%ecx)    # store U	
f76d5d
 
f76d5d
@@ -429,14 +433,15 @@ _dv_ppm_copy_y_block_mmx:
f76d5d
 _dv_pgm_copy_y_block_mmx:
f76d5d
 
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl	%edi
f76d5d
-	
f76d5d
-	movl    8(%ebp), %edi          # dest
f76d5d
-	movl    12(%ebp), %esi         # src
f76d5d
 
f76d5d
-	movq	OFFSETY, %mm7
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    16(%esp), %edi          # dest
f76d5d
+	movl    20(%esp), %esi         # src
f76d5d
+
f76d5d
+	movq	MUNG(OFFSETY), %mm7
f76d5d
 	pxor	%mm6, %mm6
f76d5d
 	
f76d5d
 	movq	(%esi), %mm0
f76d5d
@@ -571,14 +578,15 @@ _dv_pgm_copy_y_block_mmx:
f76d5d
 _dv_video_copy_y_block_mmx:
f76d5d
 
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl	%edi
f76d5d
-	
f76d5d
-	movl    8(%ebp), %edi          # dest
f76d5d
-	movl    12(%ebp), %esi         # src
f76d5d
 
f76d5d
-	movq	OFFSETBX, %mm7
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    16(%esp), %edi          # dest
f76d5d
+	movl    20(%esp), %esi         # src
f76d5d
+
f76d5d
+	movq	MUNG(OFFSETBX), %mm7
f76d5d
 	pxor	%mm6, %mm6
f76d5d
 	
f76d5d
 	movq	(%esi), %mm0
f76d5d
@@ -859,16 +871,16 @@ _dv_ppm_copy_pal_c_block_mmx:
f76d5d
 _dv_pgm_copy_pal_c_block_mmx:
f76d5d
 				
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl	%edi
f76d5d
 	pushl	%ebx
f76d5d
-	
f76d5d
-	movl    8(%ebp), %edi          # dest
f76d5d
-	movl    12(%ebp), %esi         # src
f76d5d
 
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    20(%esp), %edi          # dest
f76d5d
+	movl    24(%esp), %esi         # src
f76d5d
 
f76d5d
-	movq	OFFSETBX, %mm7
f76d5d
+	movq	MUNG(OFFSETBX), %mm7
f76d5d
 	pxor	%mm6, %mm6
f76d5d
 
f76d5d
 	
f76d5d
@@ -1007,15 +1021,16 @@ _dv_pgm_copy_pal_c_block_mmx:
f76d5d
 _dv_video_copy_pal_c_block_mmx:
f76d5d
 				
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl	%edi
f76d5d
 	pushl	%ebx
f76d5d
-	
f76d5d
-	movl    8(%ebp), %edi          # dest
f76d5d
-	movl    12(%ebp), %esi         # src
f76d5d
 
f76d5d
-	movq	OFFSETBX, %mm7
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    20(%esp), %edi          # dest
f76d5d
+	movl    24(%esp), %esi         # src
f76d5d
+
f76d5d
+	movq	MUNG(OFFSETBX), %mm7
f76d5d
 	paddw	%mm7, %mm7
f76d5d
 	pxor	%mm6, %mm6
f76d5d
 
f76d5d
@@ -1102,18 +1119,18 @@ video_copy_pal_c_block_mmx_loop:	
f76d5d
 _dv_ppm_copy_ntsc_c_block_mmx:
f76d5d
 				
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl	%edi
f76d5d
 	pushl	%ebx
f76d5d
-	
f76d5d
-	movl    8(%ebp), %edi          # dest
f76d5d
-	movl    12(%ebp), %esi         # src
f76d5d
+
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    20(%esp), %edi          # dest
f76d5d
+	movl    24(%esp), %esi         # src
f76d5d
 
f76d5d
 	movl	$4, %ebx	
f76d5d
 
f76d5d
-	movq	ALLONE, %mm6
f76d5d
-	
f76d5d
+	movq	MUNG(ALLONE), %mm6
f76d5d
 ppm_copy_ntsc_c_block_mmx_loop:	
f76d5d
 	
f76d5d
 	movq	(%esi), %mm0
f76d5d
@@ -1175,14 +1194,15 @@ ppm_copy_ntsc_c_block_mmx_loop:	
f76d5d
 _dv_pgm_copy_ntsc_c_block_mmx:
f76d5d
 				
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl	%edi
f76d5d
-	
f76d5d
-	movl    8(%ebp), %edi          # dest
f76d5d
-	movl    12(%ebp), %esi         # src
f76d5d
 
f76d5d
-	movq	OFFSETBX, %mm7
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    16(%esp), %edi          # dest
f76d5d
+	movl    20(%esp), %esi         # src
f76d5d
+
f76d5d
+	movq	MUNG(OFFSETBX), %mm7
f76d5d
 	paddw	%mm7, %mm7
f76d5d
 	pxor	%mm6, %mm6
f76d5d
 
f76d5d
@@ -1332,15 +1354,16 @@ _dv_pgm_copy_ntsc_c_block_mmx:
f76d5d
 _dv_video_copy_ntsc_c_block_mmx:
f76d5d
 				
f76d5d
 	pushl   %ebp
f76d5d
-	movl    %esp, %ebp
f76d5d
 	pushl   %esi
f76d5d
 	pushl	%edi
f76d5d
 	pushl	%ebx
f76d5d
-	
f76d5d
-	movl    8(%ebp), %edi          # dest
f76d5d
-	movl    12(%ebp), %esi         # src
f76d5d
 
f76d5d
-	movq	OFFSETBX, %mm7
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
+
f76d5d
+	movl    20(%esp), %edi          # dest
f76d5d
+	movl    24(%esp), %esi         # src
f76d5d
+
f76d5d
+	movq	MUNG(OFFSETBX), %mm7
f76d5d
 	paddw	%mm7, %mm7
f76d5d
 	pxor	%mm6, %mm6
f76d5d
 
f76d5d
--- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S
f76d5d
+++ libdv-0.104/libdv/rgbtoyuv_x86_64.S
f76d5d
@@ -41,9 +41,6 @@
f76d5d
 #define DV_WIDTH_SHORT_HALF 720
f76d5d
 #define DV_WIDTH_BYTE_HALF  360	
f76d5d
 		
f76d5d
-.global _dv_rgbtoycb_mmx_x86_64
f76d5d
-# .global yuvtoycb_mmx_x86_64
f76d5d
-
f76d5d
 .data
f76d5d
 
f76d5d
 .align 8
f76d5d
--- libdv-0.104-old/libdv/vlc_x86.S
f76d5d
+++ libdv-0.104/libdv/vlc_x86.S
f76d5d
@@ -1,31 +1,39 @@
f76d5d
 	#include "asmoff.h"
f76d5d
 .section .note.GNU-stack, "", @progbits
f76d5d
+	#include "asm_common.S"
f76d5d
 
f76d5d
 .text
f76d5d
 	.align 4
f76d5d
 .globl dv_decode_vlc 
f76d5d
+.globl asm_dv_decode_vlc 
f76d5d
+.hidden asm_dv_decode_vlc
f76d5d
+asm_dv_decode_vlc = dv_decode_vlc
f76d5d
+
f76d5d
 	.type	 dv_decode_vlc,@function
f76d5d
 dv_decode_vlc:
f76d5d
 	pushl %ebx
f76d5d
+	pushl %ebp
f76d5d
+
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
 
f76d5d
-	/* Args are at 8(%esp). */
f76d5d
-	movl  8(%esp),%eax		/* %eax is bits */
f76d5d
-	movl  12(%esp),%ebx		/* %ebx is maxbits */
f76d5d
+	/* Args are at 12(%esp). */
f76d5d
+	movl  12(%esp),%eax		/* %eax is bits */
f76d5d
+	movl  16(%esp),%ebx		/* %ebx is maxbits */
f76d5d
 	andl  $0x3f,%ebx		/* limit index range STL*/
f76d5d
 
f76d5d
-	movl  dv_vlc_class_index_mask(,%ebx,4),%edx
f76d5d
+	movl  MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
f76d5d
 	andl  %eax,%edx
f76d5d
-	movl  dv_vlc_class_index_rshift(,%ebx,4),%ecx
f76d5d
+	movl  MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
f76d5d
 	sarl  %cl,%edx
f76d5d
-	movl  dv_vlc_classes(,%ebx,4),%ecx
f76d5d
+	movl  MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
f76d5d
 	movsbl  (%ecx,%edx,1),%edx	/* %edx is class */
f76d5d
 			
f76d5d
-	movl  dv_vlc_index_mask(,%edx,4),%ebx
f76d5d
-	movl  dv_vlc_index_rshift(,%edx,4),%ecx
f76d5d
+	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
f76d5d
+	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
f76d5d
 	andl  %eax,%ebx
f76d5d
 	sarl  %cl,%ebx
f76d5d
 
f76d5d
-	movl  dv_vlc_lookups(,%edx,4),%edx
f76d5d
+	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
f76d5d
 	movl  (%edx,%ebx,4),%edx
f76d5d
 
f76d5d
 	/* Now %edx holds result, like this:
f76d5d
@@ -43,7 +52,7 @@ dv_decode_vlc:
f76d5d
 	movl  %edx,%ecx
f76d5d
 	sarl  $8,%ecx
f76d5d
 	andl  $0xff,%ecx
f76d5d
-	movl  sign_mask(,%ecx,4),%ebx
f76d5d
+	movl  MUNG_ARR(sign_mask,%ecx,4),%ebx
f76d5d
 	andl  %ebx,%eax
f76d5d
 	negl  %eax
f76d5d
 	sarl  $31,%eax
f76d5d
@@ -64,14 +73,14 @@ dv_decode_vlc:
f76d5d
 	    *result = broken;
f76d5d
 	Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
f76d5d
 	*/
f76d5d
-	movl  12(%esp),%ebx		/* %ebx is maxbits */
f76d5d
+	movl  16(%esp),%ebx		/* %ebx is maxbits */
f76d5d
 	subl  %ecx,%ebx
f76d5d
 	sbbl  %ebx,%ebx
f76d5d
 	orl   %ebx,%edx
f76d5d
 
f76d5d
-	movl  16(%esp),%eax
f76d5d
+	movl  20(%esp),%eax
f76d5d
 	movl  %edx,(%eax)
f76d5d
-	
f76d5d
+	popl  %ebp
f76d5d
 	popl  %ebx
f76d5d
 	ret
f76d5d
 	
f76d5d
@@ -81,21 +90,28 @@ dv_decode_vlc:
f76d5d
 	.type	 __dv_decode_vlc,@function
f76d5d
 __dv_decode_vlc:
f76d5d
 	pushl %ebx
f76d5d
+	pushl %ebp
f76d5d
+
f76d5d
+	LOAD_PIC_REG(bp)
f76d5d
 
f76d5d
-	/* Args are at 8(%esp). */
f76d5d
-	movl  8(%esp),%eax		/* %eax is bits */
f76d5d
+	/* Args are at 12(%esp). */
f76d5d
+	movl  12(%esp),%eax		/* %eax is bits */
f76d5d
 	
f76d5d
 	movl  %eax,%edx			/* %edx is class */
f76d5d
 	andl  $0xfe00,%edx
f76d5d
 	sarl  $9,%edx
f76d5d
+#ifdef __PIC__
f76d5d
+	movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
f76d5d
+#else
f76d5d
 	movsbl dv_vlc_class_lookup5(%edx),%edx
f76d5d
-	
f76d5d
-	movl  dv_vlc_index_mask(,%edx,4),%ebx
f76d5d
-	movl  dv_vlc_index_rshift(,%edx,4),%ecx
f76d5d
+#endif
f76d5d
+
f76d5d
+	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
f76d5d
+	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
f76d5d
 	andl  %eax,%ebx
f76d5d
 	sarl  %cl,%ebx
f76d5d
 
f76d5d
-	movl  dv_vlc_lookups(,%edx,4),%edx
f76d5d
+	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
f76d5d
 	movl  (%edx,%ebx,4),%edx
f76d5d
 
f76d5d
 	/* Now %edx holds result, like this:
f76d5d
@@ -113,7 +129,7 @@ __dv_decode_vlc:
f76d5d
 	movl  %edx,%ecx
f76d5d
 	sarl  $8,%ecx
f76d5d
 	andl  $0xff,%ecx
f76d5d
-	movl  sign_mask(,%ecx,4),%ecx
f76d5d
+	movl  MUNG_ARR(sign_mask,%ecx,4),%ecx
f76d5d
 	andl  %ecx,%eax
f76d5d
 	negl  %eax
f76d5d
 	sarl  $31,%eax
f76d5d
@@ -128,9 +144,9 @@ __dv_decode_vlc:
f76d5d
 	xorl  %eax,%edx
f76d5d
 	subl  %eax,%edx
f76d5d
 
f76d5d
-	movl  12(%esp),%eax
f76d5d
+	movl  16(%esp),%eax
f76d5d
 	movl  %edx,(%eax)
f76d5d
-	
f76d5d
+	popl  %ebp
f76d5d
 	popl  %ebx
f76d5d
 	ret
f76d5d
 
f76d5d
@@ -141,14 +157,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
f76d5d
 */
f76d5d
 .text
f76d5d
 	.align	4
f76d5d
+.globl asm_dv_parse_ac_coeffs_pass0
f76d5d
+.hidden asm_dv_parse_ac_coeffs_pass0
f76d5d
+	asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
f76d5d
+
f76d5d
 .globl	dv_parse_ac_coeffs_pass0
f76d5d
 .type	dv_parse_ac_coeffs_pass0,@function
f76d5d
 dv_parse_ac_coeffs_pass0:
f76d5d
 	pushl	%ebx
f76d5d
 	pushl	%edi
f76d5d
 	pushl	%esi
f76d5d
 	pushl	%ebp
f76d5d
 
f76d5d
+	LOAD_PIC_REG(si)
f76d5d
+
f76d5d
 #define ARGn(N)  (20+(4*(N)))(%esp)
f76d5d
 
f76d5d
 	/*
f76d5d
@@ -160,8 +183,10 @@ dv_parse_ac_coeffs_pass0:
f76d5d
 	ebp	bl
f76d5d
 	*/
f76d5d
 	movl    ARGn(2),%ebp
f76d5d
+#ifndef __PIC__
f76d5d
 	movl	ARGn(0),%esi
f76d5d
 	movl	bitstream_t_buf(%esi),%esi
f76d5d
+#endif
f76d5d
 	movl	dv_block_t_offset(%ebp),%edi
f76d5d
 	movl	dv_block_t_reorder(%ebp),%ebx
f76d5d
 
f76d5d
@@ -171,7 +196,11 @@ dv_parse_ac_coeffs_pass0:
f76d5d
 	
f76d5d
 	movq    dv_block_t_coeffs(%ebp),%mm1
f76d5d
 	pxor    %mm0,%mm0
f76d5d
+#ifdef __PIC__
f76d5d
+	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
f76d5d
+#else
f76d5d
 	pand    const_f_0_0_0,%mm1
f76d5d
+#endif
f76d5d
 	movq    %mm1,dv_block_t_coeffs(%ebp)
f76d5d
 	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
f76d5d
 	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
f76d5d
@@ -192,9 +221,17 @@ dv_parse_ac_coeffs_pass0:
f76d5d
 readloop:
f76d5d
 	movl	%edi,%ecx
f76d5d
 	shrl	$3,%ecx
f76d5d
+#ifdef __PIC__
f76d5d
+	movl    ARGn(0),%eax
f76d5d
+	addl    bitstream_t_buf(%eax),%ecx
f76d5d
+	movzbl  (%ecx),%eax
f76d5d
+	movzbl  1(%ecx),%edx
f76d5d
+	movzbl  2(%ecx),%ecx
f76d5d
+#else
f76d5d
 	movzbl  (%esi,%ecx,1),%eax
f76d5d
 	movzbl  1(%esi,%ecx,1),%edx
f76d5d
 	movzbl  2(%esi,%ecx,1),%ecx
f76d5d
+#endif
f76d5d
 	shll	$16,%eax
f76d5d
 	shll	$8,%edx
f76d5d
 	orl	%ecx,%eax
f76d5d
@@ -218,7 +255,11 @@ readloop:
f76d5d
 
f76d5d
 	/* Attempt to use the shortcut first.  If it hits, then
f76d5d
 	   this vlc term has been decoded. */
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
f76d5d
+#else
f76d5d
 	movl	dv_vlc_class1_shortcut(,%ecx,4),%edx
f76d5d
+#endif
f76d5d
 	test	$0x80,%edx
f76d5d
 	je	done_decode
f76d5d
 
f76d5d
@@ -229,12 +270,19 @@ readloop:
f76d5d
 	movl	%ebx,dv_block_t_reorder(%ebp)
f76d5d
 
f76d5d
 	/* %eax is bits */
f76d5d
-	
f76d5d
+#ifdef __PIC__
f76d5d
+	movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
f76d5d
+
f76d5d
+	movl  dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
f76d5d
+	movl  dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
f76d5d
+	movl  dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
f76d5d
+#else
f76d5d
 	movsbl dv_vlc_class_lookup5(%ecx),%ecx
f76d5d
 
f76d5d
 	movl  dv_vlc_index_mask(,%ecx,4),%ebx
f76d5d
 	movl  dv_vlc_lookups(,%ecx,4),%edx
f76d5d
 	movl  dv_vlc_index_rshift(,%ecx,4),%ecx
f76d5d
+#endif
f76d5d
 	andl  %eax,%ebx
f76d5d
 	sarl  %cl,%ebx
f76d5d
 
f76d5d
@@ -257,7 +305,11 @@ readloop:
f76d5d
 	movl  %edx,%ecx
f76d5d
 	sarl  $8,%ecx
f76d5d
 	andl  $0xff,%ecx
f76d5d
+#ifdef __PIC__
f76d5d
+	movl  sign_mask@GOTOFF(%esi,%ecx,4),%ecx
f76d5d
+#else
f76d5d
 	movl  sign_mask(,%ecx,4),%ecx
f76d5d
+#endif
f76d5d
 	andl  %ecx,%eax
f76d5d
 	negl  %eax
f76d5d
 	sarl  $31,%eax
f76d5d
@@ -327,10 +379,16 @@ alldone:
f76d5d
 
f76d5d
 slowpath:
f76d5d
 	/* slow path:	 use dv_decode_vlc */;
f76d5d
+#ifdef __PIC__
f76d5d
+	pushl	%esi
f76d5d
+	leal	vlc@GOTOFF(%esi),%esi
f76d5d
+	xchgl	%esi,(%esp)	/* last parameter is &vlc */
f76d5d
+#else
f76d5d
 	pushl	$vlc		/* last parameter is &vlc */
f76d5d
+#endif
f76d5d
 	pushl	%edx		/* bits_left */
f76d5d
 	pushl	%eax		/* bits */
f76d5d
-	call	dv_decode_vlc
f76d5d
+	call	asm_dv_decode_vlc
f76d5d
 	addl	$12,%esp
f76d5d
 	test	$0x80,%edx	/* If (vlc.run < 0) break */
f76d5d
 	jne	escape
f76d5d
@@ -367,6 +425,8 @@ show16:
f76d5d
 	pushl	%esi
f76d5d
 	pushl	%ebp
f76d5d
 
f76d5d
+	LOAD_PIC_REG(si)
f76d5d
+
f76d5d
 #define ARGn(N)  (20+(4*(N)))(%esp)
f76d5d
 
f76d5d
 	movl	ARGn(1),%eax			/* quality */
f76d5d
@@ -374,7 +435,11 @@ dv_parse_video_segment:
f76d5d
 	jz	its_mono
f76d5d
 	movl	$6,%ebx
f76d5d
 its_mono:
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	%ebx,n_blocks@GOTOFF(%esi)
f76d5d
+#else
f76d5d
 	movl	%ebx,n_blocks
f76d5d
+#endif
f76d5d
 	
f76d5d
 	/*
f76d5d
 	 *	ebx	seg/b
f76d5d
@@ -385,15 +450,22 @@ its_mono:
f76d5d
 	 *	ebp	bl
f76d5d
 	 */
f76d5d
 	movl	ARGn(0),%ebx
f76d5d
+#ifndef __PIC__
f76d5d
 	movl	dv_videosegment_t_bs(%ebx),%esi
f76d5d
 	movl	bitstream_t_buf(%esi),%esi
f76d5d
+#endif
f76d5d
 	leal	dv_videosegment_t_mb(%ebx),%edi
f76d5d
 
f76d5d
 	movl	$0,%eax
f76d5d
 	movl	$0,%ecx
f76d5d
 macloop:
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	%eax,m@GOTOFF(%esi)
f76d5d
+	movl	%ecx,mb_start@GOTOFF(%esi)
f76d5d
+#else
f76d5d
 	movl	%eax,m
f76d5d
 	movl	%ecx,mb_start
f76d5d
+#endif
f76d5d
 
f76d5d
 	movl	ARGn(0),%ebx
f76d5d
 	
f76d5d
@@ -401,7 +473,13 @@ macloop:
f76d5d
 	/* mb->qno = bitstream_get(bs,4); */
f76d5d
 	movl	%ecx,%edx
f76d5d
 	shr	$3,%edx
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	dv_videosegment_t_bs(%ebx),%ecx
f76d5d
+	movl	bitstream_t_buf(%ecx),%ecx
f76d5d
+	movzbl	3(%ecx,%edx,1),%edx
f76d5d
+#else
f76d5d
 	movzbl	3(%esi,%edx,1),%edx
f76d5d
+#endif
f76d5d
 	andl	$0xf,%edx
f76d5d
 	movl	%edx,dv_macroblock_t_qno(%edi)
f76d5d
 
f76d5d
@@ -412,7 +490,11 @@ macloop:
f76d5d
 	movl	%edx,dv_macroblock_t_eob_count(%edi)
f76d5d
 
f76d5d
 	/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
f76d5d
+#else
f76d5d
 	movl	dv_super_map_vertical(,%eax,4),%edx
f76d5d
+#endif
f76d5d
 	movl	dv_videosegment_t_i(%ebx),%ecx
f76d5d
 	addl	%ecx,%edx
f76d5d
 
f76d5d
@@ -423,11 +505,20 @@ skarly:	
f76d5d
 	andl	$1,%ecx
f76d5d
 	shll	$5,%ecx		/* ecx = (isPAL ? 32 : 0) */
f76d5d
 
f76d5d
+#ifdef __PIC__
f76d5d
+	leal	mod_10@GOTOFF(%esi),%edx
f76d5d
+	movzbl	(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
f76d5d
+#else
f76d5d
 	movzbl	mod_10(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
f76d5d
+#endif
f76d5d
 	movl	%edx,dv_macroblock_t_i(%edi)
f76d5d
 
f76d5d
 	/*  mb->j = dv_super_map_horizontal[m]; */	
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
f76d5d
+#else
f76d5d
 	movl	dv_super_map_horizontal(,%eax,4),%edx
f76d5d
+#endif
f76d5d
 	movl	%edx,dv_macroblock_t_j(%edi)
f76d5d
 
f76d5d
 	/* mb->k = seg->k; */
f76d5d
@@ -446,12 +537,28 @@ blkloop:
f76d5d
 	        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
f76d5d
 	*/
f76d5d
 	/* dc = bitstream_get(bs,9); */
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	mb_start@GOTOFF(%esi),%ecx
f76d5d
+#else
f76d5d
 	movl	mb_start,%ecx
f76d5d
+#endif
f76d5d
 	shr	$3,%ecx
f76d5d
+#ifdef __PIC__
f76d5d
+	movzbl	blk_start@GOTOFF(%esi,%ebx),%edx
f76d5d
+#else
f76d5d
 	movzbl	blk_start(%ebx),%edx
f76d5d
+#endif
f76d5d
 	addl	%ecx,%edx
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	ARGn(0),%ecx
f76d5d
+	movl	dv_videosegment_t_bs(%ecx),%ecx
f76d5d
+	movl	bitstream_t_buf(%ecx),%ecx
f76d5d
+	movzbl	(%ecx,%edx,1),%eax	/* hi byte */
f76d5d
+	movzbl	1(%ecx,%edx,1),%ecx	/* lo byte */
f76d5d
+#else
f76d5d
 	movzbl	(%esi,%edx,1),%eax	/* hi byte */
f76d5d
 	movzbl	1(%esi,%edx,1),%ecx	/* lo byte */
f76d5d
+#endif
f76d5d
 	shll	$8,%eax
f76d5d
 	orl	%ecx,%eax
f76d5d
 
f76d5d
@@ -478,7 +585,11 @@ blkloop:
f76d5d
 
f76d5d
 	/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
f76d5d
 	shll	$6,%eax
f76d5d
+#ifdef __PIC__
f76d5d
+	leal	dv_reorder@GOTOFF+1(%esi,%eax),%eax
f76d5d
+#else
f76d5d
 	addl	$(dv_reorder+1),%eax
f76d5d
+#endif
f76d5d
 	movl	%eax,dv_block_t_reorder(%ebp)
f76d5d
 
f76d5d
 	/* bl->reorder_sentinel = bl->reorder + 63; */
f76d5d
@@ -486,13 +597,22 @@ blkloop:
f76d5d
 	movl	%eax,dv_block_t_reorder_sentinel(%ebp)
f76d5d
 
f76d5d
 	/* bl->offset= mb_start + dv_parse_bit_start[b]; */
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	mb_start@GOTOFF(%esi),%ecx
f76d5d
+	movl	dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
f76d5d
+#else
f76d5d
 	movl	mb_start,%ecx
f76d5d
 	movl	dv_parse_bit_start(,%ebx,4),%eax
f76d5d
+#endif
f76d5d
 	addl	%ecx,%eax
f76d5d
 	movl	%eax,dv_block_t_offset(%ebp)
f76d5d
 
f76d5d
 	/* bl->end= mb_start + dv_parse_bit_end[b]; */
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
f76d5d
+#else
f76d5d
 	movl	dv_parse_bit_end(,%ebx,4),%eax
f76d5d
+#endif
f76d5d
 	addl	%ecx,%eax
f76d5d
 	movl	%eax,dv_block_t_end(%ebp)
f76d5d
 
f76d5d
@@ -504,7 +624,11 @@ blkloop:
f76d5d
 	/* no AC pass.  Just zero out the remaining coeffs */
f76d5d
 	movq    dv_block_t_coeffs(%ebp),%mm1
f76d5d
 	pxor    %mm0,%mm0
f76d5d
+#ifdef __PIC__
f76d5d
+	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
f76d5d
+#else
f76d5d
 	pand    const_f_0_0_0,%mm1
f76d5d
+#endif
f76d5d
 	movq    %mm1,dv_block_t_coeffs(%ebp)
f76d5d
 	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
f76d5d
 	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
f76d5d
@@ -529,18 +653,27 @@ do_ac_pass:
f76d5d
 	pushl	%ebp
f76d5d
 	pushl	%edi
f76d5d
 	pushl	%eax
f76d5d
-	call	dv_parse_ac_coeffs_pass0
f76d5d
+	call	asm_dv_parse_ac_coeffs_pass0
f76d5d
 	addl	$12,%esp
f76d5d
 done_ac:
f76d5d
 
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	n_blocks@GOTOFF(%esi),%eax
f76d5d
+#else
f76d5d
 	movl	n_blocks,%eax
f76d5d
+#endif
f76d5d
 	addl	$dv_block_t_size,%ebp
f76d5d
 	incl	%ebx
f76d5d
 	cmpl	%eax,%ebx
f76d5d
 	jnz	blkloop
f76d5d
 
f76d5d
+#ifdef __PIC__
f76d5d
+	movl	m@GOTOFF(%esi),%eax
f76d5d
+	movl	mb_start@GOTOFF(%esi),%ecx
f76d5d
+#else
f76d5d
 	movl	m,%eax
f76d5d
 	movl	mb_start,%ecx
f76d5d
+#endif
f76d5d
 	addl	$(8 * 80),%ecx
f76d5d
 	addl	$dv_macroblock_t_size,%edi
f76d5d
 	incl	%eax
f76d5d
@@ -558,7 +691,7 @@ done_ac:
f76d5d
 
f76d5d
 	andl	$DV_QUALITY_AC_MASK,%eax
f76d5d
 	cmpl	$DV_QUALITY_AC_2,%eax
f76d5d
-	jz	dv_parse_ac_coeffs
f76d5d
+	jz	asm_dv_parse_ac_coeffs
f76d5d
 	movl	$0,%eax
f76d5d
 	ret
f76d5d