70c669
Patch downloaded from
70c669
http://bugs.gentoo.org/show_bug.cgi?id=121871
70c669
http://bugs.gentoo.org/attachment.cgi?id=98094
70c669
70c669
--- libdv-0.104-old/libdv/asm_common.S
70c669
+++ libdv-0.104/libdv/asm_common.S
70c669
@@ -0,0 +1,29 @@
70c669
+/* public domain, do what you want */
70c669
+
70c669
+#ifdef __PIC__
70c669
+# define MUNG(sym)                 sym##@GOTOFF(%ebp)
70c669
+# define MUNG_ARR(sym, args...)    sym##@GOTOFF(%ebp,##args)
70c669
+#else
70c669
+# define MUNG(sym)                 sym
70c669
+# define MUNG_ARR(sym, args...)    sym(,##args)
70c669
+#endif
70c669
+
70c669
+#ifdef __PIC__
70c669
+# undef __i686 /* gcc define gets in our way */
70c669
+# define LOAD_PIC_REG(reg) \
70c669
+	.ifndef  __i686.get_pc_thunk.reg; \
70c669
+	.section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \
70c669
+	.global  __i686.get_pc_thunk.reg; \
70c669
+	.hidden  __i686.get_pc_thunk.reg; \
70c669
+	.type    __i686.get_pc_thunk.reg,@function; \
70c669
+	__i686.get_pc_thunk.reg: \
70c669
+	movl (%esp), %e##reg; \
70c669
+	ret; \
70c669
+	.size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \
70c669
+	.previous; \
70c669
+	.endif; \
70c669
+	call __i686.get_pc_thunk.reg; \
70c669
+	addl $_GLOBAL_OFFSET_TABLE_, %e##reg
70c669
+#else
70c669
+# define LOAD_PIC_REG(reg)
70c669
+#endif
70c669
--- libdv-0.104-old/libdv/dct_block_mmx.S
70c669
+++ libdv-0.104/libdv/dct_block_mmx.S
70c669
@@ -55,19 +55,22 @@ scratch2:       .quad 0
70c669
 
70c669
 .section .note.GNU-stack, "", @progbits
70c669
 
70c669
+#include "asm_common.S"
70c669
+
70c669
 .text
70c669
 
70c669
 .align 8	
70c669
 .global _dv_dct_88_block_mmx
70c669
 .hidden _dv_dct_88_block_mmx
70c669
 .type   _dv_dct_88_block_mmx,@function
70c669
 _dv_dct_88_block_mmx:
70c669
 
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 
70c669
-	movl    8(%ebp), %esi          # source
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    12(%esp), %esi          # source
70c669
 
70c669
 # column 0
70c669
 	movq 16*0(%esi), %mm0          # v0
70c669
@@ -88,22 +93,22 @@ _dv_dct_88_block_mmx:
70c669
 
70c669
 	movq 16*3(%esi), %mm5          # v3
70c669
 	movq 16*4(%esi), %mm7          # v4
70c669
-	movq  %mm7, scratch1           # scratch1: v4   ; 
70c669
+	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
70c669
 	movq  %mm5, %mm7               # duplicate v3 
70c669
-	paddw scratch1, %mm5           # v03: v3+v4  
70c669
-	psubw scratch1, %mm7           # v04: v3-v4  
70c669
-	movq  %mm5, scratch2           # scratch2: v03
70c669
+	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
70c669
+	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
70c669
+	movq  %mm5, MUNG(scratch2)     # scratch2: v03
70c669
 	movq  %mm0, %mm5               # mm5: v00
70c669
 
70c669
-	paddw scratch2, %mm0           # v10: v00+v03   
70c669
-	psubw scratch2, %mm5           # v13: v00-v03   
70c669
-	movq  %mm3, scratch3           # scratch3: v02
70c669
+	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
70c669
+	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
70c669
+	movq  %mm3, MUNG(scratch3)     # scratch3: v02
70c669
 	movq  %mm1, %mm3               # duplicate v01
70c669
 
70c669
-	paddw scratch3, %mm1          # v11: v01+v02
70c669
-	psubw scratch3, %mm3          # v12: v01-v02
70c669
+	paddw MUNG(scratch3), %mm1    # v11: v01+v02
70c669
+	psubw MUNG(scratch3), %mm3    # v12: v01-v02
70c669
 
70c669
-	movq  %mm6, scratch4           # scratch4: v05
70c669
+	movq  %mm6, MUNG(scratch4)     # scratch4: v05
70c669
 	movq  %mm0, %mm6               # duplicate v10
70c669
 
70c669
 	paddw %mm1, %mm0              # v10+v11
70c669
@@ -113,10 +118,10 @@ _dv_dct_88_block_mmx:
70c669
 	movq  %mm6, 16*4(%esi)         # out4: v10-v11 
70c669
 
70c669
 	movq  %mm4, %mm0               # mm0: v06
70c669
-	paddw scratch4, %mm4          # v15: v05+v06 
70c669
+	paddw MUNG(scratch4), %mm4    # v15: v05+v06 
70c669
 	paddw  %mm2, %mm0             # v16: v07+v06
70c669
 
70c669
-	pmulhw WA3, %mm4               # v35~: WA3*v15
70c669
+	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
70c669
 	psllw  $1, %mm4                # v35: compensate the coeefient scale
70c669
 
70c669
 	movq   %mm4, %mm6              # duplicate v35
70c669
@@ -125,7 +130,7 @@ _dv_dct_88_block_mmx:
70c669
 
70c669
 	paddw  %mm5, %mm3             # v22: v12+v13
70c669
 
70c669
-	pmulhw WA1, %mm3               # v32~: WA1*v22
70c669
+	pmulhw MUNG(WA1), %mm3         # v32~: WA1*v22
70c669
 	psllw  $16-NSHIFT, %mm3        # v32: compensate the coeefient scale
70c669
 	movq   %mm5, %mm6              # duplicate v13
70c669
 
70c669
@@ -136,13 +141,13 @@ _dv_dct_88_block_mmx:
70c669
 	movq  %mm6, 16*6(%esi)         # out6: v13-v32 
70c669
 
70c669
 
70c669
-	paddw  scratch4, %mm7         # v14n: v04+v05
70c669
+	paddw  MUNG(scratch4), %mm7   # v14n: v04+v05
70c669
 	movq   %mm0, %mm5              # duplicate v16
70c669
 
70c669
 	psubw  %mm7, %mm0             # va1: v16-v14n
70c669
-	pmulhw WA5, %mm0               # va0~:  va1*WA5
70c669
-	pmulhw WA4, %mm5               # v36~~: v16*WA4
70c669
-	pmulhw WA2, %mm7               # v34~~: v14n*WA2
70c669
+	pmulhw MUNG(WA5), %mm0         # va0~:  va1*WA5
70c669
+	pmulhw MUNG(WA4), %mm5         # v36~~: v16*WA4
70c669
+	pmulhw MUNG(WA2), %mm7         # v34~~: v14n*WA2
70c669
 	psllw  $16-WA4_SHIFT, %mm5     # v36: compensate the coeefient scale 
70c669
 	psllw  $16-NSHIFT, %mm7        # v34: compensate the coeefient scale
70c669
 
70c669
@@ -190,22 +195,22 @@ _dv_dct_88_block_mmx:
70c669
 
70c669
 	movq 16*3(%esi), %mm5              # v3
70c669
 	movq 16*4(%esi), %mm7              # v4
70c669
-	movq  %mm7, scratch1                    # scratch1: v4   ; 
70c669
+	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
70c669
 	movq  %mm5, %mm7               # duplicate v3 
70c669
-	paddw scratch1, %mm5           # v03: v3+v4  
70c669
-	psubw scratch1, %mm7           # v04: v3-v4  
70c669
-	movq  %mm5, scratch2        # scratch2: v03
70c669
+	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
70c669
+	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
70c669
+	movq  %mm5, MUNG(scratch2)     # scratch2: v03
70c669
 	movq  %mm0, %mm5               # mm5: v00
70c669
 
70c669
-	paddw scratch2, %mm0           # v10: v00+v03   
70c669
-	psubw scratch2, %mm5           # v13: v00-v03   
70c669
-	movq  %mm3, scratch3         # scratc3: v02
70c669
+	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
70c669
+	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
70c669
+	movq  %mm3, MUNG(scratch3)     # scratc3: v02
70c669
 	movq  %mm1, %mm3               # duplicate v01
70c669
 
70c669
-	paddw scratch3, %mm1           # v11: v01+v02
70c669
-	psubw scratch3, %mm3           # v12: v01-v02
70c669
+	paddw MUNG(scratch3), %mm1     # v11: v01+v02
70c669
+	psubw MUNG(scratch3), %mm3     # v12: v01-v02
70c669
 
70c669
-	movq  %mm6, scratch4         # scratc4: v05
70c669
+	movq  %mm6, MUNG(scratch4)     # scratc4: v05
70c669
 	movq  %mm0, %mm6               # duplicate v10
70c669
 
70c669
 	paddw %mm1, %mm0                            # v10+v11
70c669
@@ -215,10 +220,10 @@ _dv_dct_88_block_mmx:
70c669
 	movq  %mm6, 16*4(%esi)          # out4: v10-v11 
70c669
 
70c669
 	movq  %mm4, %mm0             # mm0: v06
70c669
-	paddw scratch4, %mm4         # v15: v05+v06 
70c669
+	paddw MUNG(scratch4), %mm4     # v15: v05+v06 
70c669
 	paddw  %mm2, %mm0                       # v16: v07+v06
70c669
 
70c669
-	pmulhw WA3, %mm4           # v35~: WA3*v15
70c669
+	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
70c669
 	psllw  $16-NSHIFT, %mm4       # v35: compensate the coeefient scale
70c669
 
70c669
 	movq   %mm4, %mm6            # duplicate v35
70c669
@@ -227,7 +232,7 @@ _dv_dct_88_block_mmx:
70c669
 
70c669
 	paddw  %mm5, %mm3            # v22: v12+v13
70c669
 
70c669
-	pmulhw WA1, %mm3           # v32~: WA3*v15
70c669
+	pmulhw MUNG(WA1), %mm3         # v32~: WA3*v15
70c669
 	psllw  $16-NSHIFT, %mm3       # v32: compensate the coeefient scale
70c669
 	movq   %mm5, %mm6            # duplicate v13
70c669
 
70c669
@@ -237,13 +242,13 @@ _dv_dct_88_block_mmx:
70c669
 	movq  %mm5, 16*2(%esi)          # out2: v13+v32 
70c669
 	movq  %mm6, 16*6(%esi)          # out6: v13-v32 
70c669
 
70c669
-	paddw  scratch4, %mm7                           # v14n: v04+v05
70c669
+	paddw  MUNG(scratch4), %mm7     # v14n: v04+v05
70c669
 	movq   %mm0, %mm5                               # duplicate v16
70c669
 
70c669
 	psubw  %mm7, %mm0                               # va1: v16-v14n
70c669
-	pmulhw WA2, %mm7                # v34~~: v14n*WA2
70c669
-	pmulhw WA5, %mm0                # va0~:  va1*WA5
70c669
-	pmulhw WA4, %mm5                        # v36~~: v16*WA4
70c669
+	pmulhw MUNG(WA2), %mm7          # v34~~: v14n*WA2
70c669
+	pmulhw MUNG(WA5), %mm0          # va0~:  va1*WA5
70c669
+	pmulhw MUNG(WA4), %mm5          # v36~~: v16*WA4
70c669
 	psllw  $16-NSHIFT, %mm7
70c669
 	psllw  $16-WA4_SHIFT, %mm5      # v36: compensate the coeffient 
70c669
 		# scale note that WA4 is shifted 1 bit less than the others
70c669
@@ -755,11 +762,12 @@ _dv_dct_block_mmx_postscale_88:
70c669
 _dv_dct_248_block_mmx:
70c669
 
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl   %edi
70c669
 
70c669
-	movl    8(%ebp), %esi          # source
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    16(%esp), %esi          # source
70c669
 
70c669
 # column 0
70c669
 
70c669
@@ -781,7 +791,7 @@ _dv_dct_248_block_mmx:
70c669
 	paddw %mm1, %mm0	       # v20: v10+v11
70c669
 	psubw %mm1, %mm3	       # v21: v10-v11
70c669
 
70c669
-	pmulhw WA1, %mm5               # v32~: WA1*v22
70c669
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
70c669
 	movq  %mm4, %mm2	
70c669
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
70c669
 
70c669
@@ -820,7 +830,7 @@ _dv_dct_248_block_mmx:
70c669
 	paddw %mm1, %mm0	       # v20: v10+v11
70c669
 	psubw %mm1, %mm3	       # v21: v10-v11
70c669
 
70c669
-	pmulhw WA1, %mm5               # v32~: WA1*v22
70c669
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
70c669
 	movq  %mm4, %mm2	
70c669
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
70c669
 
70c669
@@ -857,7 +867,7 @@ _dv_dct_248_block_mmx:
70c669
 	paddw %mm1, %mm0	       # v20: v10+v11
70c669
 	psubw %mm1, %mm3	       # v21: v10-v11
70c669
 
70c669
-	pmulhw WA1, %mm5               # v32~: WA1*v22
70c669
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
70c669
 	movq  %mm4, %mm2	
70c669
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
70c669
 
70c669
@@ -894,7 +904,7 @@ _dv_dct_248_block_mmx:
70c669
 	paddw %mm1, %mm0	       # v20: v10+v11
70c669
 	psubw %mm1, %mm3	       # v21: v10-v11
70c669
 
70c669
-	pmulhw WA1, %mm5               # v32~: WA1*v22
70c669
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
70c669
 	movq  %mm4, %mm2	
70c669
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
70c669
 
70c669
--- libdv-0.104-old/libdv/dv.c
70c669
+++ libdv-0.104/libdv/dv.c
70c669
@@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
70c669
 } /* dv_reconfigure */
70c669
 
70c669
 
70c669
+extern uint8_t dv_quant_offset[4];
70c669
+extern uint8_t dv_quant_shifts[22][4];
70c669
+
70c669
 static inline void 
70c669
 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
70c669
   int i;
70c669
@@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
70c669
       dv_idct_248 (co248, mb->b[i].coeffs);
70c669
     } else {
70c669
 #if ARCH_X86
70c669
-      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
70c669
+      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
70c669
       _dv_idct_88(mb->b[i].coeffs);
70c669
 #elif ARCH_X86_64
70c669
       _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
70c669
@@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
70c669
 	dv_idct_248 (co248, mb->b[b].coeffs);
70c669
       } else {
70c669
 #if ARCH_X86
70c669
-	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
70c669
+	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
70c669
 	_dv_weight_88_inverse(bl->coeffs);
70c669
 	_dv_idct_88(bl->coeffs);
70c669
 #elif ARCH_X86_64
70c669
--- libdv-0.104-old/libdv/encode.c
70c669
+++ libdv-0.104/libdv/encode.c
70c669
@@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
70c669
 }
70c669
 
70c669
 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
70c669
-					  dv_vlc_entry_t ** out);
70c669
+					  dv_vlc_entry_t ** out,
70c669
+					  dv_vlc_entry_t * lookup);
70c669
 
70c669
 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
70c669
 					  dv_vlc_entry_t ** out);
70c669
@@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
70c669
 #elif ARCH_X86
70c669
 	int num_bits;
70c669
 
70c669
-	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
70c669
+	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
70c669
 	emms();
70c669
 #else
70c669
 	int num_bits;
70c669
@@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
70c669
 	return num_bits;
70c669
 }
70c669
 
70c669
-extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
70c669
+extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
70c669
 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
70c669
 
70c669
 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
70c669
@@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
70c669
 #elif ARCH_X86_64
70c669
 	return _dv_vlc_num_bits_block_x86_64(coeffs);
70c669
 #else
70c669
-	return _dv_vlc_num_bits_block_x86(coeffs);
70c669
+	return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
70c669
 #endif
70c669
 }
70c669
 
70c669
--- libdv-0.104-old/libdv/encode_x86.S
70c669
+++ libdv-0.104/libdv/encode_x86.S
70c669
@@ -23,9 +23,6 @@
70c669
  *  The libdv homepage is http://libdv.sourceforge.net/.  
70c669
  */
70c669
 
70c669
-.data
70c669
-ALLONE:		.word 1,1,1,1
70c669
-VLCADDMASK:	.byte 255,0,0,0,255,0,0,0
70c669
 		
70c669
 
70c669
 .section .note.GNU-stack, "", @progbits
70c669
@@ -49,11 +47,14 @@ _dv_vlc_encode_block_mmx:	
70c669
 
70c669
 	movl	$63, %ecx
70c669
 
70c669
-	movl	vlc_encode_lookup, %esi
70c669
+	movl	4+4*4+8(%esp), %esi              # vlc_encode_lookup
70c669
 
70c669
 	pxor	%mm0, %mm0
70c669
 	pxor	%mm2, %mm2
70c669
-	movq	VLCADDMASK, %mm1
70c669
+	pushl	$0x000000FF                      # these four lines
70c669
+	pushl	$0x000000FF                      # load VLCADDMASK
70c669
+	movq	(%esp), %mm1                     # into %mm1 off the stack
70c669
+	addl	$8, %esp                         #  --> no TEXTRELs
70c669
 	xorl	%ebp, %ebp
70c669
 	subl	$8, %edx
70c669
 vlc_encode_block_mmx_loop:
70c669
@@ -125,7 +128,7 @@ _dv_vlc_num_bits_block_x86:	
70c669
 	addl	$2, %edi
70c669
 
70c669
 	movl	$63, %ecx
70c669
-	movl	vlc_num_bits_lookup, %esi
70c669
+	movl	4+4*4+4(%esp), %esi              # vlc_num_bits_lookup
70c669
 	
70c669
 vlc_num_bits_block_x86_loop:
70c669
 	movw	(%edi), %ax
70c669
@@ -583,8 +594,11 @@ _dv_need_dct_248_mmx_rows:
70c669
 	paddw	%mm5, %mm1
70c669
 
70c669
 	paddw	%mm1, %mm0
70c669
-	
70c669
-	pmaddwd	ALLONE, %mm0	
70c669
+
70c669
+	pushl	$0x00010001              # these four lines
70c669
+	pushl	$0x00010001              # load ALLONE
70c669
+	pmaddwd	(%esp), %mm0             # into %mm0 off the stack
70c669
+	addl	$8, %esp                 #  --> no TEXTRELs
70c669
 	movq	%mm0, %mm1
70c669
 	psrlq	$32, %mm1
70c669
 	paddd	%mm1, %mm0
70c669
--- libdv-0.104-old/libdv/idct_block_mmx.S
70c669
+++ libdv-0.104/libdv/idct_block_mmx.S
70c669
@@ -8,17 +8,21 @@
70c669
 
70c669
 .section .note.GNU-stack, "", @progbits
70c669
 
70c669
+#include "asm_common.S"
70c669
+
70c669
 .text
70c669
 	.align 4
70c669
 .global _dv_idct_block_mmx
70c669
 .hidden _dv_idct_block_mmx
70c669
 .type   _dv_idct_block_mmx,@function
70c669
 _dv_idct_block_mmx:
70c669
 	pushl	 %ebp
70c669
-	movl	 %esp,%ebp
70c669
 	pushl	 %esi
70c669
-	leal	 preSC, %ecx
70c669
-	movl	 8(%ebp),%esi		/* source matrix */
70c669
+
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	leal	 MUNG(preSC), %ecx
70c669
+	movl	 12(%esp),%esi		/* source matrix */
70c669
 
70c669
 /* 
70c669
  *	column 0: even part
70c669
@@ -34,7 +40,7 @@ _dv_idct_block_mmx:
70c669
 	movq %mm1, %mm2			/* added 11/1/96 */
70c669
 	pmulhw 8*8(%esi),%mm5		/* V8 */
70c669
 	psubsw %mm0, %mm1		/* V16 */
70c669
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V18 */
70c669
+	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V18 */
70c669
 	paddsw %mm0, %mm2		/* V17 */
70c669
 	movq %mm2, %mm0			/* duplicate V17 */
70c669
 	psraw $1, %mm2			/* t75=t82 */
70c669
@@ -75,7 +81,7 @@ _dv_idct_block_mmx:
70c669
 	paddsw %mm0, %mm3		/* V29 ; free mm0 */
70c669
 	movq %mm7, %mm1			/* duplicate V26 */
70c669
 	psraw $1, %mm3			/* t91=t94 */
70c669
-	pmulhw x539f539f539f539f,%mm7	/* V33 */
70c669
+	pmulhw MUNG(x539f539f539f539f),%mm7	/* V33 */
70c669
 	psraw $1, %mm1			/* t96 */
70c669
 	movq %mm5, %mm0			/* duplicate V2 */
70c669
 	psraw $2, %mm4			/* t85=t87 */
70c669
@@ -83,15 +89,15 @@ _dv_idct_block_mmx:
70c669
 	psubsw %mm4, %mm0		/* V28 ; free mm4 */
70c669
 	movq %mm0, %mm2			/* duplicate V28 */
70c669
 	psraw $1, %mm5			/* t90=t93 */
70c669
-	pmulhw x4546454645464546,%mm0	/* V35 */
70c669
+	pmulhw MUNG(x4546454645464546),%mm0	/* V35 */
70c669
 	psraw $1, %mm2			/* t97 */
70c669
 	movq %mm5, %mm4			/* duplicate t90=t93 */
70c669
 	psubsw %mm2, %mm1		/* V32 ; free mm2 */
70c669
-	pmulhw x61f861f861f861f8,%mm1	/* V36 */
70c669
+	pmulhw MUNG(x61f861f861f861f8),%mm1	/* V36 */
70c669
 	psllw $1, %mm7			/* t107 */
70c669
 	paddsw %mm3, %mm5		/* V31 */
70c669
 	psubsw %mm3, %mm4		/* V30 ; free mm3 */
70c669
-	pmulhw x5a825a825a825a82,%mm4	/* V34 */
70c669
+	pmulhw MUNG(x5a825a825a825a82),%mm4	/* V34 */
70c669
 	nop
70c669
 	psubsw %mm1, %mm0		/* V38 */
70c669
 	psubsw %mm7, %mm1		/* V37 ; free mm7 */
70c669
@@ -158,7 +164,7 @@ _dv_idct_block_mmx:
70c669
 	psubsw %mm7, %mm1		/* V50 */
70c669
 	pmulhw 8*9(%esi), %mm5		/* V9 */
70c669
 	paddsw %mm7, %mm2		/* V51 */
70c669
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V52 */
70c669
+	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V52 */
70c669
 	movq %mm2, %mm6			/* duplicate V51 */
70c669
 	psraw $1, %mm2			/* t138=t144 */
70c669
 	movq %mm3, %mm4			/* duplicate V1 */
70c669
@@ -199,11 +205,11 @@ _dv_idct_block_mmx:
70c669
  * even more by doing the correction step in a later stage when the number
70c669
  * is actually multiplied by 16
70c669
  */
70c669
-	paddw x0005000200010001, %mm4
70c669
+	paddw MUNG(x0005000200010001), %mm4
70c669
 	psubsw %mm6, %mm3		/* V60 ; free mm6 */
70c669
 	psraw $1, %mm0			/* t154=t156 */
70c669
 	movq %mm3, %mm1			/* duplicate V60 */
70c669
-	pmulhw x539f539f539f539f, %mm1	/* V67 */
70c669
+	pmulhw MUNG(x539f539f539f539f), %mm1	/* V67 */
70c669
 	movq %mm5, %mm6			/* duplicate V3 */
70c669
 	psraw $2, %mm4			/* t148=t150 */
70c669
 	paddsw %mm4, %mm5		/* V61 */
70c669
@@ -212,13 +218,13 @@ _dv_idct_block_mmx:
70c669
 	psllw $1, %mm1			/* t169 */
70c669
 	paddsw %mm0, %mm5		/* V65 -> result */
70c669
 	psubsw %mm0, %mm4		/* V64 ; free mm0 */
70c669
-	pmulhw x5a825a825a825a82, %mm4	/* V68 */
70c669
+	pmulhw MUNG(x5a825a825a825a82), %mm4	/* V68 */
70c669
 	psraw $1, %mm3			/* t158 */
70c669
 	psubsw %mm6, %mm3		/* V66 */
70c669
 	movq %mm5, %mm2			/* duplicate V65 */
70c669
-	pmulhw x61f861f861f861f8, %mm3	/* V70 */
70c669
+	pmulhw MUNG(x61f861f861f861f8), %mm3	/* V70 */
70c669
 	psllw $1, %mm6			/* t165 */
70c669
-	pmulhw x4546454645464546, %mm6	/* V69 */
70c669
+	pmulhw MUNG(x4546454645464546), %mm6	/* V69 */
70c669
 	psraw $1, %mm2			/* t172 */
70c669
 /* moved from next block */
70c669
 	movq 8*5(%esi), %mm0		/* V56 */
70c669
@@ -343,7 +349,7 @@ _dv_idct_block_mmx:
70c669
 *	movq 8*13(%esi), %mm4		tmt13
70c669
 */
70c669
 	psubsw %mm4, %mm3		/* V134 */
70c669
-	pmulhw x5a825a825a825a82, %mm3	/* 23170 ->V136 */
70c669
+	pmulhw MUNG(x5a825a825a825a82), %mm3	/* 23170 ->V136 */
70c669
 	movq 8*9(%esi), %mm6		/* tmt9 */
70c669
 	paddsw %mm4, %mm5		/* V135 ; mm4 free */
70c669
 	movq %mm0, %mm4			/* duplicate tmt1 */
70c669
@@ -372,17 +378,17 @@ _dv_idct_block_mmx:
70c669
 	psubsw %mm7, %mm0		/* V144 */
70c669
 	movq %mm0, %mm3			/* duplicate V144 */
70c669
 	paddsw %mm7, %mm2		/* V147 ; free mm7 */
70c669
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V151 */
70c669
+	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V151 */
70c669
 	movq %mm1, %mm7			/* duplicate tmt3 */
70c669
 	paddsw %mm5, %mm7		/* V145 */
70c669
 	psubsw %mm5, %mm1		/* V146 ; free mm5 */
70c669
 	psubsw %mm1, %mm3		/* V150 */
70c669
 	movq %mm7, %mm5			/* duplicate V145 */
70c669
-	pmulhw x4546454645464546, %mm1	/* 17734-> V153 */
70c669
+	pmulhw MUNG(x4546454645464546), %mm1	/* 17734-> V153 */
70c669
 	psubsw %mm2, %mm5		/* V148 */
70c669
-	pmulhw x61f861f861f861f8, %mm3	/* 25080-> V154 */
70c669
+	pmulhw MUNG(x61f861f861f861f8), %mm3	/* 25080-> V154 */
70c669
 	psllw $2, %mm0			/* t311 */
70c669
-	pmulhw x5a825a825a825a82, %mm5	/* 23170-> V152 */
70c669
+	pmulhw MUNG(x5a825a825a825a82), %mm5	/* 23170-> V152 */
70c669
 	paddsw %mm2, %mm7		/* V149 ; free mm2 */
70c669
 	psllw $1, %mm1			/* t313 */
70c669
 	nop	/* without the nop - freeze here for one clock */
70c669
@@ -408,7 +414,7 @@ _dv_idct_block_mmx:
70c669
 	paddsw %mm3, %mm6		/* V164 ; free mm3 */
70c669
 	movq %mm4, %mm3			/* duplicate V142 */
70c669
 	psubsw %mm5, %mm4		/* V165 ; free mm5 */
70c669
-	movq %mm2, scratch7		/* out7 */
70c669
+	movq %mm2, MUNG(scratch7)		/* out7 */
70c669
 	psraw $4, %mm6
70c669
 	psraw $4, %mm4
70c669
 	paddsw %mm5, %mm3		/* V162 */
70c669
@@ -419,11 +425,11 @@ _dv_idct_block_mmx:
70c669
  */
70c669
 	movq %mm6, 8*9(%esi)		/* out9 */
70c669
 	paddsw %mm1, %mm0		/* V161 */
70c669
-	movq %mm3, scratch5		/* out5 */
70c669
+	movq %mm3, MUNG(scratch5)		/* out5 */
70c669
 	psubsw %mm1, %mm5		/* V166 ; free mm1 */
70c669
 	movq %mm4, 8*11(%esi)		/* out11 */
70c669
 	psraw $4, %mm5
70c669
-	movq %mm0, scratch3		/* out3 */
70c669
+	movq %mm0, MUNG(scratch3)		/* out3 */
70c669
 	movq %mm2, %mm4			/* duplicate V140 */
70c669
 	movq %mm5, 8*13(%esi)		/* out13 */
70c669
 	paddsw %mm7, %mm2		/* V160 */
70c669
@@ -433,7 +439,7 @@ _dv_idct_block_mmx:
70c669
 /* moved from the next block */
70c669
 	movq 8*3(%esi), %mm7
70c669
 	psraw $4, %mm4
70c669
-	movq %mm2, scratch1		/* out1 */
70c669
+	movq %mm2, MUNG(scratch1)		/* out1 */
70c669
 /* moved from the next block */
70c669
 	movq %mm0, %mm1
70c669
 	movq %mm4, 8*15(%esi)		/* out15 */
70c669
@@ -490,15 +496,15 @@ _dv_idct_block_mmx:
70c669
 	paddsw %mm4, %mm3		/* V113 ; free mm4 */
70c669
 	movq %mm0, %mm4			/* duplicate V110 */
70c669
 	paddsw %mm1, %mm2		/* V111 */
70c669
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V117 */
70c669
+	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V117 */
70c669
 	psubsw %mm1, %mm5		/* V112 ; free mm1 */
70c669
 	psubsw %mm5, %mm4		/* V116 */
70c669
 	movq %mm2, %mm1			/* duplicate V111 */
70c669
-	pmulhw x4546454645464546, %mm5	/* 17734-> V119 */
70c669
+	pmulhw MUNG(x4546454645464546), %mm5	/* 17734-> V119 */
70c669
 	psubsw %mm3, %mm2		/* V114 */
70c669
-	pmulhw x61f861f861f861f8, %mm4	/* 25080-> V120 */
70c669
+	pmulhw MUNG(x61f861f861f861f8), %mm4	/* 25080-> V120 */
70c669
 	paddsw %mm3, %mm1		/* V115 ; free mm3 */
70c669
-	pmulhw x5a825a825a825a82, %mm2	/* 23170-> V118 */
70c669
+	pmulhw MUNG(x5a825a825a825a82), %mm2	/* 23170-> V118 */
70c669
 	psllw $2, %mm0			/* t266 */
70c669
 	movq %mm1, (%esi)		/* save V115 */
70c669
 	psllw $1, %mm5			/* t268 */
70c669
@@ -516,7 +522,7 @@ _dv_idct_block_mmx:
70c669
 	movq %mm6, %mm3			/* duplicate tmt4 */
70c669
 	psubsw %mm0, %mm6		/* V100 */
70c669
 	paddsw %mm0, %mm3		/* V101 ; free mm0 */
70c669
-	pmulhw x5a825a825a825a82, %mm6	/* 23170 ->V102 */
70c669
+	pmulhw MUNG(x5a825a825a825a82), %mm6	/* 23170 ->V102 */
70c669
 	movq %mm7, %mm5			/* duplicate tmt0 */
70c669
 	movq 8*8(%esi), %mm1		/* tmt8 */
70c669
 	paddsw %mm1, %mm7		/* V103 */
70c669
@@ -550,10 +556,10 @@ _dv_idct_block_mmx:
70c669
 	movq 8*2(%esi), %mm3		/* V123 */
70c669
 	paddsw %mm4, %mm7		/* out0 */
70c669
 /* moved up from next block */
70c669
-	movq scratch3, %mm0
70c669
+	movq MUNG(scratch3), %mm0
70c669
 	psraw $4, %mm7
70c669
 /* moved up from next block */
70c669
-	movq scratch5, %mm6 
70c669
+	movq MUNG(scratch5), %mm6 
70c669
 	psubsw %mm4, %mm1		/* out14 ; free mm4 */
70c669
 	paddsw %mm3, %mm5		/* out2 */
70c669
 	psraw $4, %mm1
70c669
@@ -564,7 +570,7 @@ _dv_idct_block_mmx:
70c669
 	movq %mm5, 8*2(%esi)		/* out2 ; free mm5 */
70c669
 	psraw $4, %mm2
70c669
 /* moved up to the prev block */
70c669
-	movq scratch7, %mm4
70c669
+	movq MUNG(scratch7), %mm4
70c669
 /* moved up to the prev block */
70c669
 	psraw $4, %mm0
70c669
 	movq %mm2, 8*12(%esi)		/* out12 ; free mm2 */
70c669
@@ -578,7 +584,7 @@ _dv_idct_block_mmx:
70c669
  *	psraw $4, %mm0
70c669
  *	psraw $4, %mm6
70c669
 */
70c669
-	movq scratch1, %mm1
70c669
+	movq MUNG(scratch1), %mm1
70c669
 	psraw $4, %mm4
70c669
 	movq %mm0, 8*3(%esi)		/* out3 */
70c669
 	psraw $4, %mm1
70c669
--- libdv-0.104-old/libdv/parse.c
70c669
+++ libdv-0.104/libdv/parse.c
70c669
@@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
70c669
   exit(0);
70c669
 #endif
70c669
 } /* dv_parse_ac_coeffs */
70c669
+#if defined __GNUC__ && __ELF__
70c669
+# define dv_strong_hidden_alias(name, aliasname) \
70c669
+    extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
70c669
+dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
70c669
+#else
70c669
+int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
70c669
+#endif
70c669
 
70c669
 /* ---------------------------------------------------------------------------
70c669
  */
70c669
--- libdv-0.104-old/libdv/quant.c
70c669
+++ libdv-0.104/libdv/quant.c
70c669
@@ -144,7 +144,7 @@ uint8_t  dv_quant_offset[4] = { 6,3,0,1 
70c669
 uint32_t	dv_quant_248_mul_tab [2] [22] [64];
70c669
 uint32_t dv_quant_88_mul_tab [2] [22] [64];
70c669
 
70c669
-extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
70c669
+extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts);
70c669
 extern void             _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
70c669
 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
70c669
 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
70c669
@@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
70c669
 		_dv_quant_x86_64(block, qno, klass);
70c669
 		emms();
70c669
 #else
70c669
-		_dv_quant_x86(block, qno, klass);
70c669
+		_dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
70c669
 		emms();
70c669
 #endif
70c669
 	}
70c669
--- libdv-0.104-old/libdv/quant.h
70c669
+++ libdv-0.104/libdv/quant.h
70c669
@@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
70c669
 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
70c669
 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
70c669
                                   dv_248_coeff_t *co);
70c669
-extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
70c669
+extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts);
70c669
 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
70c669
 extern void dv_quant_init (void);
70c669
 #ifdef __cplusplus
70c669
--- libdv-0.104-old/libdv/quant_x86.S
70c669
+++ libdv-0.104/libdv/quant_x86.S
70c669
@@ -73,10 +75,13 @@ _dv_quant_88_inverse_x86:	
70c669
 	
70c669
 	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
70c669
 	movl	ARGn(1),%eax	/* qno */
70c669
+	movl	ARGn(3),%ebx	/* dv_quant_offset */
70c669
+	addl	ARGn(2),%ebx	/* class */
70c669
+	movzbl	(%ebx),%ecx
70c669
 	movl	ARGn(2),%ebx	/* class */
70c669
-	movzbl	dv_quant_offset(%ebx),%ecx
70c669
 	addl	%ecx,%eax
70c669
-	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
70c669
+	movl	ARGn(4),%edx	/* dv_quant_shifts */
70c669
+	leal	(%edx,%eax,4),%edx	/* edx is pq */
70c669
 
70c669
 	/* extra = (class == 3); */
70c669
 				/*  0   1   2   3 */
70c669
@@ -214,11 +221,13 @@ _dv_quant_x86:	
70c669
 	
70c669
 	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
70c669
 	movl	ARGn(1),%eax	/* qno */
70c669
+	movl	ARGn(3),%ebx	/* offset */
70c669
+	addl	ARGn(2),%ebx	/* class */
70c669
+	movzbl	(%ebx),%ecx
70c669
 	movl	ARGn(2),%ebx	/* class */
70c669
-
70c669
-	movzbl	dv_quant_offset(%ebx),%ecx
70c669
+	movl	ARGn(4),%edx	/* shifts */
70c669
 	addl	%ecx,%eax
70c669
-	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
70c669
+	leal	(%edx,%eax,4),%edx	/* edx is pq */
70c669
 
70c669
 	/* extra = (class == 3); */
70c669
 				/*  0   1   2   3 */
70c669
--- libdv-0.104-old/libdv/rgbtoyuv.S
70c669
+++ libdv-0.104/libdv/rgbtoyuv.S
70c669
@@ -41,9 +41,6 @@
70c669
 #define DV_WIDTH_SHORT_HALF 720
70c669
 #define DV_WIDTH_BYTE_HALF  360	
70c669
 		
70c669
-.global _dv_rgbtoycb_mmx
70c669
-# .global yuvtoycb_mmx
70c669
-
70c669
 .data
70c669
 
70c669
 .align 8
70c669
@@ -110,25 +107,26 @@ VR0GR:  .long   0,0
70c669
 VBG0B:  .long   0,0
70c669
 	
70c669
 #endif	
70c669
-	
70c669
+
70c669
+#include "asm_common.S"
70c669
+
70c669
 .section .note.GNU-stack, "", @progbits
70c669
 
70c669
 .text
70c669
 
70c669
-#define _inPtr     8
70c669
-#define _rows      12
70c669
-#define _columns   16
70c669
-#define _outyPtr   20
70c669
-#define _outuPtr   24
70c669
-#define _outvPtr   28
70c669
+#define _inPtr     24+8
70c669
+#define _rows      24+12
70c669
+#define _columns   24+16
70c669
+#define _outyPtr   24+20
70c669
+#define _outuPtr   24+24
70c669
+#define _outvPtr   24+28
70c669
 
70c669
 .global _dv_rgbtoycb_mmx
70c669
 .hidden _dv_rgbtoycb_mmx
70c669
 .type   _dv_rgbtoycb_mmx,@function
70c669
 _dv_rgbtoycb_mmx:
70c669
 
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %eax
70c669
 	pushl   %ebx
70c669
 	pushl   %ecx
70c669
@@ -133,46 +134,47 @@ _dv_rgbtoycb_mmx:
70c669
 	pushl   %esi
70c669
 	pushl   %edi
70c669
 
70c669
-	leal    ZEROSX, %eax    #This section gets around a bug
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	leal    MUNG(ZEROSX), %eax    #This section gets around a bug
70c669
 	movq    (%eax), %mm0    #unlikely to persist
70c669
-	movq    %mm0, ZEROS
70c669
-	leal    OFFSETDX, %eax
70c669
+	movq    %mm0, MUNG(ZEROS)
70c669
+	leal    MUNG(OFFSETDX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, OFFSETD
70c669
-	leal    OFFSETWX, %eax
70c669
+	movq    %mm0, MUNG(OFFSETD)
70c669
+	leal    MUNG(OFFSETWX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, OFFSETW
70c669
-	leal    OFFSETBX, %eax
70c669
+	movq    %mm0, MUNG(OFFSETW)
70c669
+	leal    MUNG(OFFSETBX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, OFFSETB
70c669
-	leal    YR0GRX, %eax
70c669
+	movq    %mm0, MUNG(OFFSETB)
70c669
+	leal    MUNG(YR0GRX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, YR0GR
70c669
-	leal    YBG0BX, %eax
70c669
+	movq    %mm0, MUNG(YR0GR)
70c669
+	leal    MUNG(YBG0BX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, YBG0B
70c669
-	leal    UR0GRX, %eax
70c669
+	movq    %mm0, MUNG(YBG0B)
70c669
+	leal    MUNG(UR0GRX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, UR0GR
70c669
-	leal    UBG0BX, %eax
70c669
+	movq    %mm0, MUNG(UR0GR)
70c669
+	leal    MUNG(UBG0BX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, UBG0B
70c669
-	leal    VR0GRX, %eax
70c669
+	movq    %mm0, MUNG(UBG0B)
70c669
+	leal    MUNG(VR0GRX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, VR0GR
70c669
-	leal    VBG0BX, %eax
70c669
+	movq    %mm0, MUNG(VR0GR)
70c669
+	leal    MUNG(VBG0BX), %eax
70c669
 	movq    (%eax), %mm0
70c669
-	movq    %mm0, VBG0B
70c669
-	
70c669
-	movl    _rows(%ebp), %eax
70c669
-	movl    _columns(%ebp), %ebx
70c669
+	movq    %mm0, MUNG(VBG0B)
70c669
+	movl    _rows(%esp), %eax
70c669
+	movl    _columns(%esp), %ebx
70c669
 	mull    %ebx            #number pixels
70c669
 	shrl    $3, %eax        #number of loops
70c669
 	movl    %eax, %edi      #loop counter in edi
70c669
-	movl    _inPtr(%ebp), %eax
70c669
-	movl    _outyPtr(%ebp), %ebx
70c669
-	movl    _outuPtr(%ebp), %ecx
70c669
-	movl    _outvPtr(%ebp), %edx
70c669
+	movl    _inPtr(%esp), %eax
70c669
+	movl    _outyPtr(%esp), %ebx
70c669
+	movl    _outuPtr(%esp), %ecx
70c669
+	movl    _outvPtr(%esp), %edx
70c669
 rgbtoycb_mmx_loop: 
70c669
 	movq    (%eax), %mm1    #load G2R2B1G1R1B0G0R0
70c669
 	pxor    %mm6, %mm6      #0 -> mm6
70c669
@@ -186,29 +188,29 @@ rgbtoycb_mmx_loop: 
70c669
 	punpcklbw %mm6, %mm1     #B1G1R1B0 -> mm1
70c669
 	movq    %mm0, %mm2      #R1B0G0R0 -> mm2
70c669
 
70c669
-	pmaddwd YR0GR, %mm0     #yrR1,ygG0+yrR0 -> mm0
70c669
+	pmaddwd MUNG(YR0GR), %mm0     #yrR1,ygG0+yrR0 -> mm0
70c669
 	movq    %mm1, %mm3      #B1G1R1B0 -> mm3
70c669
 
70c669
-	pmaddwd YBG0B, %mm1     #ybB1+ygG1,ybB0 -> mm1
70c669
+	pmaddwd MUNG(YBG0B), %mm1     #ybB1+ygG1,ybB0 -> mm1
70c669
 	movq    %mm2, %mm4      #R1B0G0R0 -> mm4
70c669
 
70c669
-	pmaddwd UR0GR, %mm2     #urR1,ugG0+urR0 -> mm2
70c669
+	pmaddwd MUNG(UR0GR), %mm2     #urR1,ugG0+urR0 -> mm2
70c669
 	movq    %mm3, %mm5      #B1G1R1B0 -> mm5
70c669
 
70c669
-	pmaddwd UBG0B, %mm3     #ubB1+ugG1,ubB0 -> mm3
70c669
+	pmaddwd MUNG(UBG0B), %mm3     #ubB1+ugG1,ubB0 -> mm3
70c669
 	punpckhbw       %mm6, %mm7 #    00G2R2 -> mm7
70c669
 
70c669
-	pmaddwd VR0GR, %mm4     #vrR1,vgG0+vrR0 -> mm4
70c669
+	pmaddwd MUNG(VR0GR), %mm4     #vrR1,vgG0+vrR0 -> mm4
70c669
 	paddd   %mm1, %mm0      #Y1Y0 -> mm0
70c669
 
70c669
-	pmaddwd VBG0B, %mm5     #vbB1+vgG1,vbB0 -> mm5
70c669
+	pmaddwd MUNG(VBG0B), %mm5     #vbB1+vgG1,vbB0 -> mm5
70c669
 
70c669
 	movq    8(%eax), %mm1   #R5B4G4R4B3G3R3B2 -> mm1
70c669
 	paddd   %mm3, %mm2      #U1U0 -> mm2
70c669
 
70c669
 	movq    %mm1, %mm6      #R5B4G4R4B3G3R3B2 -> mm6
70c669
 
70c669
-	punpcklbw       ZEROS, %mm1     #B3G3R3B2 -> mm1
70c669
+	punpcklbw       MUNG(ZEROS), %mm1     #B3G3R3B2 -> mm1
70c669
 	paddd   %mm5, %mm4      #V1V0 -> mm4
70c669
 
70c669
 	movq    %mm1, %mm5      #B3G3R3B2 -> mm5
70c669
@@ -216,29 +218,29 @@ rgbtoycb_mmx_loop: 
70c669
 
70c669
 	paddd   %mm7, %mm1      #R3B200+00G2R2=R3B2G2R2->mm1
70c669
 
70c669
-	punpckhbw       ZEROS, %mm6     #R5B4G4R3 -> mm6
70c669
+	punpckhbw       MUNG(ZEROS), %mm6     #R5B4G4R3 -> mm6
70c669
 	movq    %mm1, %mm3      #R3B2G2R2 -> mm3
70c669
 
70c669
-	pmaddwd YR0GR, %mm1     #yrR3,ygG2+yrR2 -> mm1
70c669
+	pmaddwd MUNG(YR0GR), %mm1     #yrR3,ygG2+yrR2 -> mm1
70c669
 	movq    %mm5, %mm7      #B3G3R3B2 -> mm7
70c669
 
70c669
-	pmaddwd YBG0B, %mm5     #ybB3+ygG3,ybB2 -> mm5
70c669
+	pmaddwd MUNG(YBG0B), %mm5     #ybB3+ygG3,ybB2 -> mm5
70c669
 	psrad   $FIXPSHIFT, %mm0       #32-bit scaled Y1Y0 -> mm0
70c669
 
70c669
-	movq    %mm6, TEMP0     #R5B4G4R4 -> TEMP0
70c669
+	movq    %mm6, MUNG(TEMP0)     #R5B4G4R4 -> TEMP0
70c669
 	movq    %mm3, %mm6      #R3B2G2R2 -> mm6
70c669
-	pmaddwd UR0GR, %mm6     #urR3,ugG2+urR2 -> mm6
70c669
+	pmaddwd MUNG(UR0GR), %mm6     #urR3,ugG2+urR2 -> mm6
70c669
 	psrad   $FIXPSHIFT, %mm2       #32-bit scaled U1U0 -> mm2
70c669
 
70c669
 	paddd   %mm5, %mm1      #Y3Y2 -> mm1
70c669
 	movq    %mm7, %mm5      #B3G3R3B2 -> mm5
70c669
-	pmaddwd UBG0B, %mm7     #ubB3+ugG3,ubB2
70c669
+	pmaddwd MUNG(UBG0B), %mm7     #ubB3+ugG3,ubB2
70c669
 	psrad   $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
70c669
 
70c669
-	pmaddwd VR0GR, %mm3     #vrR3,vgG2+vgR2
70c669
+	pmaddwd MUNG(VR0GR), %mm3     #vrR3,vgG2+vgR2
70c669
 	packssdw        %mm1, %mm0      #Y3Y2Y1Y0 -> mm0
70c669
 
70c669
-	pmaddwd VBG0B, %mm5     #vbB3+vgG3,vbB2 -> mm5
70c669
+	pmaddwd MUNG(VBG0B), %mm5     #vbB3+vgG3,vbB2 -> mm5
70c669
 	psrad   $FIXPSHIFT, %mm4       #32-bit scaled V1V0 -> mm4
70c669
 
70c669
 	movq    16(%eax), %mm1  #B7G7R7B6G6R6B5G5 -> mm7
70c669
@@ -253,58 +255,58 @@ rgbtoycb_mmx_loop: 
70c669
 	movq    %mm7, %mm5      #R7B6G6R6B5G500 -> mm5
70c669
 	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V3V2 -> mm3
70c669
 
70c669
-	paddw	OFFSETY, %mm0
70c669
+	paddw	MUNG(OFFSETY), %mm0
70c669
 	movq    %mm0, (%ebx)     #store Y3Y2Y1Y0 
70c669
 	packssdw %mm6, %mm2      #32-bit scaled U3U2U1U0 -> mm2
70c669
 
70c669
-	movq    TEMP0, %mm0     #R5B4G4R4 -> mm0
70c669
+	movq    MUNG(TEMP0), %mm0     #R5B4G4R4 -> mm0
70c669
 	addl	$8, %ebx
70c669
-	
70c669
-	punpcklbw       ZEROS, %mm7     #B5G500 -> mm7
70c669
+
70c669
+	punpcklbw       MUNG(ZEROS), %mm7     #B5G500 -> mm7
70c669
 	movq    %mm0, %mm6      #R5B4G4R4 -> mm6
70c669
 
70c669
-	movq    %mm2, TEMPU     #32-bit scaled U3U2U1U0 -> TEMPU
70c669
+	movq    %mm2, MUNG(TEMPU)     #32-bit scaled U3U2U1U0 -> TEMPU
70c669
 	psrlq   $32, %mm0       #00R5B4 -> mm0
70c669
 
70c669
 	paddw   %mm0, %mm7      #B5G5R5B4 -> mm7
70c669
 	movq    %mm6, %mm2      #B5B4G4R4 -> mm2
70c669
 
70c669
-	pmaddwd YR0GR, %mm2     #yrR5,ygG4+yrR4 -> mm2
70c669
+	pmaddwd MUNG(YR0GR), %mm2     #yrR5,ygG4+yrR4 -> mm2
70c669
 	movq    %mm7, %mm0      #B5G5R5B4 -> mm0
70c669
 
70c669
-	pmaddwd YBG0B, %mm7     #ybB5+ygG5,ybB4 -> mm7
70c669
+	pmaddwd MUNG(YBG0B), %mm7     #ybB5+ygG5,ybB4 -> mm7
70c669
 	packssdw        %mm3, %mm4      #32-bit scaled V3V2V1V0 -> mm4
70c669
 
70c669
 	addl    $24, %eax       #increment RGB count
70c669
 
70c669
-	movq    %mm4, TEMPV     #(V3V2V1V0)/256 -> mm4
70c669
+	movq    %mm4, MUNG(TEMPV)     #(V3V2V1V0)/256 -> mm4
70c669
 	movq    %mm6, %mm4      #B5B4G4R4 -> mm4
70c669
 
70c669
-	pmaddwd UR0GR, %mm6     #urR5,ugG4+urR4
70c669
+	pmaddwd MUNG(UR0GR), %mm6     #urR5,ugG4+urR4
70c669
 	movq    %mm0, %mm3      #B5G5R5B4 -> mm0
70c669
 
70c669
-	pmaddwd UBG0B, %mm0     #ubB5+ugG5,ubB4
70c669
+	pmaddwd MUNG(UBG0B), %mm0     #ubB5+ugG5,ubB4
70c669
 	paddd   %mm7, %mm2      #Y5Y4 -> mm2
70c669
 
70c669
-	pmaddwd         VR0GR, %mm4     #vrR5,vgG4+vrR4 -> mm4
70c669
+	pmaddwd         MUNG(VR0GR), %mm4     #vrR5,vgG4+vrR4 -> mm4
70c669
 	pxor    %mm7, %mm7      #0 -> mm7
70c669
 
70c669
-	pmaddwd VBG0B, %mm3     #vbB5+vgG5,vbB4 -> mm3
70c669
+	pmaddwd MUNG(VBG0B), %mm3     #vbB5+vgG5,vbB4 -> mm3
70c669
 	punpckhbw       %mm7, %mm1      #B7G7R7B6 -> mm1
70c669
 
70c669
 	paddd   %mm6, %mm0      #U5U4 -> mm0
70c669
 	movq    %mm1, %mm6      #B7G7R7B6 -> mm6
70c669
 
70c669
-	pmaddwd YBG0B, %mm6     #ybB7+ygG7,ybB6 -> mm6
70c669
+	pmaddwd MUNG(YBG0B), %mm6     #ybB7+ygG7,ybB6 -> mm6
70c669
 	punpckhbw       %mm7, %mm5      #R7B6G6R6 -> mm5
70c669
 
70c669
 	movq    %mm5, %mm7      #R7B6G6R6 -> mm7
70c669
 	paddd   %mm4, %mm3      #V5V4 -> mm3
70c669
 
70c669
-	pmaddwd YR0GR, %mm5     #yrR7,ygG6+yrR6 -> mm5
70c669
+	pmaddwd MUNG(YR0GR), %mm5     #yrR7,ygG6+yrR6 -> mm5
70c669
 	movq    %mm1, %mm4      #B7G7R7B6 -> mm4
70c669
 
70c669
-	pmaddwd UBG0B, %mm4     #ubB7+ugG7,ubB6 -> mm4
70c669
+	pmaddwd MUNG(UBG0B), %mm4     #ubB7+ugG7,ubB6 -> mm4
70c669
 	psrad   $FIXPSHIFT, %mm0       #32-bit scaled U5U4 -> mm0
70c669
 
70c669
 	psrad   $FIXPSHIFT, %mm2       #32-bit scaled Y5Y4 -> mm2
70c669
@@ -312,25 +314,25 @@ rgbtoycb_mmx_loop: 
70c669
 	paddd   %mm5, %mm6      #Y7Y6 -> mm6
70c669
 	movq    %mm7, %mm5      #R7B6G6R6 -> mm5
70c669
 
70c669
-	pmaddwd UR0GR, %mm7     #urR7,ugG6+ugR6 -> mm7
70c669
+	pmaddwd MUNG(UR0GR), %mm7     #urR7,ugG6+ugR6 -> mm7
70c669
 	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V5V4 -> mm3
70c669
 
70c669
-	pmaddwd VBG0B, %mm1     #vbB7+vgG7,vbB6 -> mm1
70c669
+	pmaddwd MUNG(VBG0B), %mm1     #vbB7+vgG7,vbB6 -> mm1
70c669
 	psrad   $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
70c669
 
70c669
 	packssdw %mm6, %mm2     #Y7Y6Y5Y4 -> mm2
70c669
 
70c669
-	pmaddwd VR0GR, %mm5     #vrR7,vgG6+vrR6 -> mm5
70c669
+	pmaddwd MUNG(VR0GR), %mm5     #vrR7,vgG6+vrR6 -> mm5
70c669
 	paddd   %mm4, %mm7      #U7U6 -> mm7    
70c669
 
70c669
 	psrad   $FIXPSHIFT, %mm7       #32-bit scaled U7U6 -> mm7
70c669
-	paddw	OFFSETY, %mm2
70c669
+	paddw	MUNG(OFFSETY), %mm2
70c669
 	movq	%mm2, (%ebx)    #store Y7Y6Y5Y4 
70c669
 
70c669
-	movq	ALLONE, %mm6
70c669
+	movq	MUNG(ALLONE), %mm6
70c669
 	packssdw %mm7, %mm0     #32-bit scaled U7U6U5U4 -> mm0
70c669
 
70c669
-	movq    TEMPU, %mm4     #32-bit scaled U3U2U1U0 -> mm4
70c669
+	movq    MUNG(TEMPU), %mm4     #32-bit scaled U3U2U1U0 -> mm4
70c669
 	pmaddwd	%mm6, %mm0      #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
70c669
 	
70c669
 	pmaddwd	%mm6, %mm4      #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
70c669
@@ -340,8 +342,8 @@ rgbtoycb_mmx_loop: 
70c669
 
70c669
 	psrad   $FIXPSHIFT, %mm1       #32-bit scaled V7V6 -> mm1
70c669
 	psraw	$1, %mm4 	#divide UU3 UU2 UU1 UU0 by 2 -> mm4
70c669
-		
70c669
-	movq    TEMPV, %mm5     #32-bit scaled V3V2V1V0 -> mm5
70c669
+
70c669
+	movq    MUNG(TEMPV), %mm5     #32-bit scaled V3V2V1V0 -> mm5
70c669
 
70c669
 	movq	%mm4, (%ecx)    # store U	
70c669
 
70c669
@@ -429,14 +433,15 @@ _dv_ppm_copy_y_block_mmx:
70c669
 _dv_pgm_copy_y_block_mmx:
70c669
 
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl	%edi
70c669
-	
70c669
-	movl    8(%ebp), %edi          # dest
70c669
-	movl    12(%ebp), %esi         # src
70c669
 
70c669
-	movq	OFFSETY, %mm7
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    16(%esp), %edi          # dest
70c669
+	movl    20(%esp), %esi         # src
70c669
+
70c669
+	movq	MUNG(OFFSETY), %mm7
70c669
 	pxor	%mm6, %mm6
70c669
 	
70c669
 	movq	(%esi), %mm0
70c669
@@ -571,14 +578,15 @@ _dv_pgm_copy_y_block_mmx:
70c669
 _dv_video_copy_y_block_mmx:
70c669
 
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl	%edi
70c669
-	
70c669
-	movl    8(%ebp), %edi          # dest
70c669
-	movl    12(%ebp), %esi         # src
70c669
 
70c669
-	movq	OFFSETBX, %mm7
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    16(%esp), %edi          # dest
70c669
+	movl    20(%esp), %esi         # src
70c669
+
70c669
+	movq	MUNG(OFFSETBX), %mm7
70c669
 	pxor	%mm6, %mm6
70c669
 	
70c669
 	movq	(%esi), %mm0
70c669
@@ -859,16 +871,16 @@ _dv_ppm_copy_pal_c_block_mmx:
70c669
 _dv_pgm_copy_pal_c_block_mmx:
70c669
 				
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl	%edi
70c669
 	pushl	%ebx
70c669
-	
70c669
-	movl    8(%ebp), %edi          # dest
70c669
-	movl    12(%ebp), %esi         # src
70c669
 
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    20(%esp), %edi          # dest
70c669
+	movl    24(%esp), %esi         # src
70c669
 
70c669
-	movq	OFFSETBX, %mm7
70c669
+	movq	MUNG(OFFSETBX), %mm7
70c669
 	pxor	%mm6, %mm6
70c669
 
70c669
 	
70c669
@@ -1007,15 +1021,16 @@ _dv_pgm_copy_pal_c_block_mmx:
70c669
 _dv_video_copy_pal_c_block_mmx:
70c669
 				
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl	%edi
70c669
 	pushl	%ebx
70c669
-	
70c669
-	movl    8(%ebp), %edi          # dest
70c669
-	movl    12(%ebp), %esi         # src
70c669
 
70c669
-	movq	OFFSETBX, %mm7
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    20(%esp), %edi          # dest
70c669
+	movl    24(%esp), %esi         # src
70c669
+
70c669
+	movq	MUNG(OFFSETBX), %mm7
70c669
 	paddw	%mm7, %mm7
70c669
 	pxor	%mm6, %mm6
70c669
 
70c669
@@ -1102,18 +1119,18 @@ video_copy_pal_c_block_mmx_loop:	
70c669
 _dv_ppm_copy_ntsc_c_block_mmx:
70c669
 				
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl	%edi
70c669
 	pushl	%ebx
70c669
-	
70c669
-	movl    8(%ebp), %edi          # dest
70c669
-	movl    12(%ebp), %esi         # src
70c669
+
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    20(%esp), %edi          # dest
70c669
+	movl    24(%esp), %esi         # src
70c669
 
70c669
 	movl	$4, %ebx	
70c669
 
70c669
-	movq	ALLONE, %mm6
70c669
-	
70c669
+	movq	MUNG(ALLONE), %mm6
70c669
 ppm_copy_ntsc_c_block_mmx_loop:	
70c669
 	
70c669
 	movq	(%esi), %mm0
70c669
@@ -1175,14 +1194,15 @@ ppm_copy_ntsc_c_block_mmx_loop:	
70c669
 _dv_pgm_copy_ntsc_c_block_mmx:
70c669
 				
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl	%edi
70c669
-	
70c669
-	movl    8(%ebp), %edi          # dest
70c669
-	movl    12(%ebp), %esi         # src
70c669
 
70c669
-	movq	OFFSETBX, %mm7
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    16(%esp), %edi          # dest
70c669
+	movl    20(%esp), %esi         # src
70c669
+
70c669
+	movq	MUNG(OFFSETBX), %mm7
70c669
 	paddw	%mm7, %mm7
70c669
 	pxor	%mm6, %mm6
70c669
 
70c669
@@ -1332,15 +1354,16 @@ _dv_pgm_copy_ntsc_c_block_mmx:
70c669
 _dv_video_copy_ntsc_c_block_mmx:
70c669
 				
70c669
 	pushl   %ebp
70c669
-	movl    %esp, %ebp
70c669
 	pushl   %esi
70c669
 	pushl	%edi
70c669
 	pushl	%ebx
70c669
-	
70c669
-	movl    8(%ebp), %edi          # dest
70c669
-	movl    12(%ebp), %esi         # src
70c669
 
70c669
-	movq	OFFSETBX, %mm7
70c669
+	LOAD_PIC_REG(bp)
70c669
+
70c669
+	movl    20(%esp), %edi          # dest
70c669
+	movl    24(%esp), %esi         # src
70c669
+
70c669
+	movq	MUNG(OFFSETBX), %mm7
70c669
 	paddw	%mm7, %mm7
70c669
 	pxor	%mm6, %mm6
70c669
 
70c669
--- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S
70c669
+++ libdv-0.104/libdv/rgbtoyuv_x86_64.S
70c669
@@ -41,9 +41,6 @@
70c669
 #define DV_WIDTH_SHORT_HALF 720
70c669
 #define DV_WIDTH_BYTE_HALF  360	
70c669
 		
70c669
-.global _dv_rgbtoycb_mmx_x86_64
70c669
-# .global yuvtoycb_mmx_x86_64
70c669
-
70c669
 .data
70c669
 
70c669
 .align 8
70c669
--- libdv-0.104-old/libdv/vlc_x86.S
70c669
+++ libdv-0.104/libdv/vlc_x86.S
70c669
@@ -1,31 +1,39 @@
70c669
 	#include "asmoff.h"
70c669
 .section .note.GNU-stack, "", @progbits
70c669
+	#include "asm_common.S"
70c669
 
70c669
 .text
70c669
 	.align 4
70c669
 .globl dv_decode_vlc 
70c669
+.globl asm_dv_decode_vlc 
70c669
+.hidden asm_dv_decode_vlc
70c669
+asm_dv_decode_vlc = dv_decode_vlc
70c669
+
70c669
 	.type	 dv_decode_vlc,@function
70c669
 dv_decode_vlc:
70c669
 	pushl %ebx
70c669
+	pushl %ebp
70c669
+
70c669
+	LOAD_PIC_REG(bp)
70c669
 
70c669
-	/* Args are at 8(%esp). */
70c669
-	movl  8(%esp),%eax		/* %eax is bits */
70c669
-	movl  12(%esp),%ebx		/* %ebx is maxbits */
70c669
+	/* Args are at 12(%esp). */
70c669
+	movl  12(%esp),%eax		/* %eax is bits */
70c669
+	movl  16(%esp),%ebx		/* %ebx is maxbits */
70c669
 	andl  $0x3f,%ebx		/* limit index range STL*/
70c669
 
70c669
-	movl  dv_vlc_class_index_mask(,%ebx,4),%edx
70c669
+	movl  MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
70c669
 	andl  %eax,%edx
70c669
-	movl  dv_vlc_class_index_rshift(,%ebx,4),%ecx
70c669
+	movl  MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
70c669
 	sarl  %cl,%edx
70c669
-	movl  dv_vlc_classes(,%ebx,4),%ecx
70c669
+	movl  MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
70c669
 	movsbl  (%ecx,%edx,1),%edx	/* %edx is class */
70c669
 			
70c669
-	movl  dv_vlc_index_mask(,%edx,4),%ebx
70c669
-	movl  dv_vlc_index_rshift(,%edx,4),%ecx
70c669
+	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
70c669
+	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
70c669
 	andl  %eax,%ebx
70c669
 	sarl  %cl,%ebx
70c669
 
70c669
-	movl  dv_vlc_lookups(,%edx,4),%edx
70c669
+	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
70c669
 	movl  (%edx,%ebx,4),%edx
70c669
 
70c669
 	/* Now %edx holds result, like this:
70c669
@@ -43,7 +52,7 @@ dv_decode_vlc:
70c669
 	movl  %edx,%ecx
70c669
 	sarl  $8,%ecx
70c669
 	andl  $0xff,%ecx
70c669
-	movl  sign_mask(,%ecx,4),%ebx
70c669
+	movl  MUNG_ARR(sign_mask,%ecx,4),%ebx
70c669
 	andl  %ebx,%eax
70c669
 	negl  %eax
70c669
 	sarl  $31,%eax
70c669
@@ -64,14 +73,14 @@ dv_decode_vlc:
70c669
 	    *result = broken;
70c669
 	Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
70c669
 	*/
70c669
-	movl  12(%esp),%ebx		/* %ebx is maxbits */
70c669
+	movl  16(%esp),%ebx		/* %ebx is maxbits */
70c669
 	subl  %ecx,%ebx
70c669
 	sbbl  %ebx,%ebx
70c669
 	orl   %ebx,%edx
70c669
 
70c669
-	movl  16(%esp),%eax
70c669
+	movl  20(%esp),%eax
70c669
 	movl  %edx,(%eax)
70c669
-	
70c669
+	popl  %ebp
70c669
 	popl  %ebx
70c669
 	ret
70c669
 	
70c669
@@ -81,21 +90,28 @@ dv_decode_vlc:
70c669
 	.type	 __dv_decode_vlc,@function
70c669
 __dv_decode_vlc:
70c669
 	pushl %ebx
70c669
+	pushl %ebp
70c669
+
70c669
+	LOAD_PIC_REG(bp)
70c669
 
70c669
-	/* Args are at 8(%esp). */
70c669
-	movl  8(%esp),%eax		/* %eax is bits */
70c669
+	/* Args are at 12(%esp). */
70c669
+	movl  12(%esp),%eax		/* %eax is bits */
70c669
 	
70c669
 	movl  %eax,%edx			/* %edx is class */
70c669
 	andl  $0xfe00,%edx
70c669
 	sarl  $9,%edx
70c669
+#ifdef __PIC__
70c669
+	movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
70c669
+#else
70c669
 	movsbl dv_vlc_class_lookup5(%edx),%edx
70c669
-	
70c669
-	movl  dv_vlc_index_mask(,%edx,4),%ebx
70c669
-	movl  dv_vlc_index_rshift(,%edx,4),%ecx
70c669
+#endif
70c669
+
70c669
+	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
70c669
+	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
70c669
 	andl  %eax,%ebx
70c669
 	sarl  %cl,%ebx
70c669
 
70c669
-	movl  dv_vlc_lookups(,%edx,4),%edx
70c669
+	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
70c669
 	movl  (%edx,%ebx,4),%edx
70c669
 
70c669
 	/* Now %edx holds result, like this:
70c669
@@ -113,7 +129,7 @@ __dv_decode_vlc:
70c669
 	movl  %edx,%ecx
70c669
 	sarl  $8,%ecx
70c669
 	andl  $0xff,%ecx
70c669
-	movl  sign_mask(,%ecx,4),%ecx
70c669
+	movl  MUNG_ARR(sign_mask,%ecx,4),%ecx
70c669
 	andl  %ecx,%eax
70c669
 	negl  %eax
70c669
 	sarl  $31,%eax
70c669
@@ -128,9 +144,9 @@ __dv_decode_vlc:
70c669
 	xorl  %eax,%edx
70c669
 	subl  %eax,%edx
70c669
 
70c669
-	movl  12(%esp),%eax
70c669
+	movl  16(%esp),%eax
70c669
 	movl  %edx,(%eax)
70c669
-	
70c669
+	popl  %ebp
70c669
 	popl  %ebx
70c669
 	ret
70c669
 
70c669
@@ -141,14 +157,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
70c669
 */
70c669
 .text
70c669
 	.align	4
70c669
+.globl asm_dv_parse_ac_coeffs_pass0
70c669
+.hidden asm_dv_parse_ac_coeffs_pass0
70c669
+	asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
70c669
+
70c669
 .globl	dv_parse_ac_coeffs_pass0
70c669
 .type	dv_parse_ac_coeffs_pass0,@function
70c669
 dv_parse_ac_coeffs_pass0:
70c669
 	pushl	%ebx
70c669
 	pushl	%edi
70c669
 	pushl	%esi
70c669
 	pushl	%ebp
70c669
 
70c669
+	LOAD_PIC_REG(si)
70c669
+
70c669
 #define ARGn(N)  (20+(4*(N)))(%esp)
70c669
 
70c669
 	/*
70c669
@@ -160,8 +183,10 @@ dv_parse_ac_coeffs_pass0:
70c669
 	ebp	bl
70c669
 	*/
70c669
 	movl    ARGn(2),%ebp
70c669
+#ifndef __PIC__
70c669
 	movl	ARGn(0),%esi
70c669
 	movl	bitstream_t_buf(%esi),%esi
70c669
+#endif
70c669
 	movl	dv_block_t_offset(%ebp),%edi
70c669
 	movl	dv_block_t_reorder(%ebp),%ebx
70c669
 
70c669
@@ -171,7 +196,11 @@ dv_parse_ac_coeffs_pass0:
70c669
 	
70c669
 	movq    dv_block_t_coeffs(%ebp),%mm1
70c669
 	pxor    %mm0,%mm0
70c669
+#ifdef __PIC__
70c669
+	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
70c669
+#else
70c669
 	pand    const_f_0_0_0,%mm1
70c669
+#endif
70c669
 	movq    %mm1,dv_block_t_coeffs(%ebp)
70c669
 	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
70c669
 	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
70c669
@@ -192,9 +221,17 @@ dv_parse_ac_coeffs_pass0:
70c669
 readloop:
70c669
 	movl	%edi,%ecx
70c669
 	shrl	$3,%ecx
70c669
+#ifdef __PIC__
70c669
+	movl    ARGn(0),%eax
70c669
+	addl    bitstream_t_buf(%eax),%ecx
70c669
+	movzbl  (%ecx),%eax
70c669
+	movzbl  1(%ecx),%edx
70c669
+	movzbl  2(%ecx),%ecx
70c669
+#else
70c669
 	movzbl  (%esi,%ecx,1),%eax
70c669
 	movzbl  1(%esi,%ecx,1),%edx
70c669
 	movzbl  2(%esi,%ecx,1),%ecx
70c669
+#endif
70c669
 	shll	$16,%eax
70c669
 	shll	$8,%edx
70c669
 	orl	%ecx,%eax
70c669
@@ -218,7 +255,11 @@ readloop:
70c669
 
70c669
 	/* Attempt to use the shortcut first.  If it hits, then
70c669
 	   this vlc term has been decoded. */
70c669
+#ifdef __PIC__
70c669
+	movl	dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
70c669
+#else
70c669
 	movl	dv_vlc_class1_shortcut(,%ecx,4),%edx
70c669
+#endif
70c669
 	test	$0x80,%edx
70c669
 	je	done_decode
70c669
 
70c669
@@ -229,12 +270,19 @@ readloop:
70c669
 	movl	%ebx,dv_block_t_reorder(%ebp)
70c669
 
70c669
 	/* %eax is bits */
70c669
-	
70c669
+#ifdef __PIC__
70c669
+	movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
70c669
+
70c669
+	movl  dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
70c669
+	movl  dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
70c669
+	movl  dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
70c669
+#else
70c669
 	movsbl dv_vlc_class_lookup5(%ecx),%ecx
70c669
 
70c669
 	movl  dv_vlc_index_mask(,%ecx,4),%ebx
70c669
 	movl  dv_vlc_lookups(,%ecx,4),%edx
70c669
 	movl  dv_vlc_index_rshift(,%ecx,4),%ecx
70c669
+#endif
70c669
 	andl  %eax,%ebx
70c669
 	sarl  %cl,%ebx
70c669
 
70c669
@@ -257,7 +305,11 @@ readloop:
70c669
 	movl  %edx,%ecx
70c669
 	sarl  $8,%ecx
70c669
 	andl  $0xff,%ecx
70c669
+#ifdef __PIC__
70c669
+	movl  sign_mask@GOTOFF(%esi,%ecx,4),%ecx
70c669
+#else
70c669
 	movl  sign_mask(,%ecx,4),%ecx
70c669
+#endif
70c669
 	andl  %ecx,%eax
70c669
 	negl  %eax
70c669
 	sarl  $31,%eax
70c669
@@ -327,10 +379,16 @@ alldone:
70c669
 
70c669
 slowpath:
70c669
 	/* slow path:	 use dv_decode_vlc */;
70c669
+#ifdef __PIC__
70c669
+	pushl	%esi
70c669
+	leal	vlc@GOTOFF(%esi),%esi
70c669
+	xchgl	%esi,(%esp)	/* last parameter is &vlc */
70c669
+#else
70c669
 	pushl	$vlc		/* last parameter is &vlc */
70c669
+#endif
70c669
 	pushl	%edx		/* bits_left */
70c669
 	pushl	%eax		/* bits */
70c669
-	call	dv_decode_vlc
70c669
+	call	asm_dv_decode_vlc
70c669
 	addl	$12,%esp
70c669
 	test	$0x80,%edx	/* If (vlc.run < 0) break */
70c669
 	jne	escape
70c669
@@ -367,6 +425,8 @@ show16:
70c669
 	pushl	%esi
70c669
 	pushl	%ebp
70c669
 
70c669
+	LOAD_PIC_REG(si)
70c669
+
70c669
 #define ARGn(N)  (20+(4*(N)))(%esp)
70c669
 
70c669
 	movl	ARGn(1),%eax			/* quality */
70c669
@@ -374,7 +435,11 @@ dv_parse_video_segment:
70c669
 	jz	its_mono
70c669
 	movl	$6,%ebx
70c669
 its_mono:
70c669
+#ifdef __PIC__
70c669
+	movl	%ebx,n_blocks@GOTOFF(%esi)
70c669
+#else
70c669
 	movl	%ebx,n_blocks
70c669
+#endif
70c669
 	
70c669
 	/*
70c669
 	 *	ebx	seg/b
70c669
@@ -385,15 +450,22 @@ its_mono:
70c669
 	 *	ebp	bl
70c669
 	 */
70c669
 	movl	ARGn(0),%ebx
70c669
+#ifndef __PIC__
70c669
 	movl	dv_videosegment_t_bs(%ebx),%esi
70c669
 	movl	bitstream_t_buf(%esi),%esi
70c669
+#endif
70c669
 	leal	dv_videosegment_t_mb(%ebx),%edi
70c669
 
70c669
 	movl	$0,%eax
70c669
 	movl	$0,%ecx
70c669
 macloop:
70c669
+#ifdef __PIC__
70c669
+	movl	%eax,m@GOTOFF(%esi)
70c669
+	movl	%ecx,mb_start@GOTOFF(%esi)
70c669
+#else
70c669
 	movl	%eax,m
70c669
 	movl	%ecx,mb_start
70c669
+#endif
70c669
 
70c669
 	movl	ARGn(0),%ebx
70c669
 	
70c669
@@ -401,7 +473,13 @@ macloop:
70c669
 	/* mb->qno = bitstream_get(bs,4); */
70c669
 	movl	%ecx,%edx
70c669
 	shr	$3,%edx
70c669
+#ifdef __PIC__
70c669
+	movl	dv_videosegment_t_bs(%ebx),%ecx
70c669
+	movl	bitstream_t_buf(%ecx),%ecx
70c669
+	movzbl	3(%ecx,%edx,1),%edx
70c669
+#else
70c669
 	movzbl	3(%esi,%edx,1),%edx
70c669
+#endif
70c669
 	andl	$0xf,%edx
70c669
 	movl	%edx,dv_macroblock_t_qno(%edi)
70c669
 
70c669
@@ -412,7 +490,11 @@ macloop:
70c669
 	movl	%edx,dv_macroblock_t_eob_count(%edi)
70c669
 
70c669
 	/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
70c669
+#ifdef __PIC__
70c669
+	movl	dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
70c669
+#else
70c669
 	movl	dv_super_map_vertical(,%eax,4),%edx
70c669
+#endif
70c669
 	movl	dv_videosegment_t_i(%ebx),%ecx
70c669
 	addl	%ecx,%edx
70c669
 
70c669
@@ -423,11 +505,20 @@ skarly:	
70c669
 	andl	$1,%ecx
70c669
 	shll	$5,%ecx		/* ecx = (isPAL ? 32 : 0) */
70c669
 
70c669
+#ifdef __PIC__
70c669
+	leal	mod_10@GOTOFF(%esi),%edx
70c669
+	movzbl	(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
70c669
+#else
70c669
 	movzbl	mod_10(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
70c669
+#endif
70c669
 	movl	%edx,dv_macroblock_t_i(%edi)
70c669
 
70c669
 	/*  mb->j = dv_super_map_horizontal[m]; */	
70c669
+#ifdef __PIC__
70c669
+	movl	dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
70c669
+#else
70c669
 	movl	dv_super_map_horizontal(,%eax,4),%edx
70c669
+#endif
70c669
 	movl	%edx,dv_macroblock_t_j(%edi)
70c669
 
70c669
 	/* mb->k = seg->k; */
70c669
@@ -446,12 +537,28 @@ blkloop:
70c669
 	        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
70c669
 	*/
70c669
 	/* dc = bitstream_get(bs,9); */
70c669
+#ifdef __PIC__
70c669
+	movl	mb_start@GOTOFF(%esi),%ecx
70c669
+#else
70c669
 	movl	mb_start,%ecx
70c669
+#endif
70c669
 	shr	$3,%ecx
70c669
+#ifdef __PIC__
70c669
+	movzbl	blk_start@GOTOFF(%esi,%ebx),%edx
70c669
+#else
70c669
 	movzbl	blk_start(%ebx),%edx
70c669
+#endif
70c669
 	addl	%ecx,%edx
70c669
+#ifdef __PIC__
70c669
+	movl	ARGn(0),%ecx
70c669
+	movl	dv_videosegment_t_bs(%ecx),%ecx
70c669
+	movl	bitstream_t_buf(%ecx),%ecx
70c669
+	movzbl	(%ecx,%edx,1),%eax	/* hi byte */
70c669
+	movzbl	1(%ecx,%edx,1),%ecx	/* lo byte */
70c669
+#else
70c669
 	movzbl	(%esi,%edx,1),%eax	/* hi byte */
70c669
 	movzbl	1(%esi,%edx,1),%ecx	/* lo byte */
70c669
+#endif
70c669
 	shll	$8,%eax
70c669
 	orl	%ecx,%eax
70c669
 
70c669
@@ -478,7 +585,11 @@ blkloop:
70c669
 
70c669
 	/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
70c669
 	shll	$6,%eax
70c669
+#ifdef __PIC__
70c669
+	leal	dv_reorder@GOTOFF+1(%esi,%eax),%eax
70c669
+#else
70c669
 	addl	$(dv_reorder+1),%eax
70c669
+#endif
70c669
 	movl	%eax,dv_block_t_reorder(%ebp)
70c669
 
70c669
 	/* bl->reorder_sentinel = bl->reorder + 63; */
70c669
@@ -486,13 +597,22 @@ blkloop:
70c669
 	movl	%eax,dv_block_t_reorder_sentinel(%ebp)
70c669
 
70c669
 	/* bl->offset= mb_start + dv_parse_bit_start[b]; */
70c669
+#ifdef __PIC__
70c669
+	movl	mb_start@GOTOFF(%esi),%ecx
70c669
+	movl	dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
70c669
+#else
70c669
 	movl	mb_start,%ecx
70c669
 	movl	dv_parse_bit_start(,%ebx,4),%eax
70c669
+#endif
70c669
 	addl	%ecx,%eax
70c669
 	movl	%eax,dv_block_t_offset(%ebp)
70c669
 
70c669
 	/* bl->end= mb_start + dv_parse_bit_end[b]; */
70c669
+#ifdef __PIC__
70c669
+	movl	dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
70c669
+#else
70c669
 	movl	dv_parse_bit_end(,%ebx,4),%eax
70c669
+#endif
70c669
 	addl	%ecx,%eax
70c669
 	movl	%eax,dv_block_t_end(%ebp)
70c669
 
70c669
@@ -504,7 +624,11 @@ blkloop:
70c669
 	/* no AC pass.  Just zero out the remaining coeffs */
70c669
 	movq    dv_block_t_coeffs(%ebp),%mm1
70c669
 	pxor    %mm0,%mm0
70c669
+#ifdef __PIC__
70c669
+	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
70c669
+#else
70c669
 	pand    const_f_0_0_0,%mm1
70c669
+#endif
70c669
 	movq    %mm1,dv_block_t_coeffs(%ebp)
70c669
 	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
70c669
 	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
70c669
@@ -529,18 +653,27 @@ do_ac_pass:
70c669
 	pushl	%ebp
70c669
 	pushl	%edi
70c669
 	pushl	%eax
70c669
-	call	dv_parse_ac_coeffs_pass0
70c669
+	call	asm_dv_parse_ac_coeffs_pass0
70c669
 	addl	$12,%esp
70c669
 done_ac:
70c669
 
70c669
+#ifdef __PIC__
70c669
+	movl	n_blocks@GOTOFF(%esi),%eax
70c669
+#else
70c669
 	movl	n_blocks,%eax
70c669
+#endif
70c669
 	addl	$dv_block_t_size,%ebp
70c669
 	incl	%ebx
70c669
 	cmpl	%eax,%ebx
70c669
 	jnz	blkloop
70c669
 
70c669
+#ifdef __PIC__
70c669
+	movl	m@GOTOFF(%esi),%eax
70c669
+	movl	mb_start@GOTOFF(%esi),%ecx
70c669
+#else
70c669
 	movl	m,%eax
70c669
 	movl	mb_start,%ecx
70c669
+#endif
70c669
 	addl	$(8 * 80),%ecx
70c669
 	addl	$dv_macroblock_t_size,%edi
70c669
 	incl	%eax
70c669
@@ -558,7 +691,7 @@ done_ac:
70c669
 
70c669
 	andl	$DV_QUALITY_AC_MASK,%eax
70c669
 	cmpl	$DV_QUALITY_AC_2,%eax
70c669
-	jz	dv_parse_ac_coeffs
70c669
+	jz	asm_dv_parse_ac_coeffs
70c669
 	movl	$0,%eax
70c669
 	ret
70c669