Blob Blame History Raw
Patch downloaded from
http://bugs.gentoo.org/show_bug.cgi?id=121871
http://bugs.gentoo.org/attachment.cgi?id=98094

--- libdv-0.104-old/libdv/asm_common.S
+++ libdv-0.104/libdv/asm_common.S
@@ -0,0 +1,29 @@
+/* public domain, do what you want */
+
+#ifdef __PIC__
+# define MUNG(sym)                 sym##@GOTOFF(%ebp)
+# define MUNG_ARR(sym, args...)    sym##@GOTOFF(%ebp,##args)
+#else
+# define MUNG(sym)                 sym
+# define MUNG_ARR(sym, args...)    sym(,##args)
+#endif
+
+#ifdef __PIC__
+# undef __i686 /* gcc define gets in our way */
+# define LOAD_PIC_REG(reg) \
+	.ifndef  __i686.get_pc_thunk.reg; \
+	.section .gnu.linkonce.t.__i686.get_pc_thunk.reg,"ax",@progbits; \
+	.global  __i686.get_pc_thunk.reg; \
+	.hidden  __i686.get_pc_thunk.reg; \
+	.type    __i686.get_pc_thunk.reg,@function; \
+	__i686.get_pc_thunk.reg: \
+	movl (%esp), %e##reg; \
+	ret; \
+	.size __i686.get_pc_thunk.reg,.-__i686.get_pc_thunk.reg; \
+	.previous; \
+	.endif; \
+	call __i686.get_pc_thunk.reg; \
+	addl $_GLOBAL_OFFSET_TABLE_, %e##reg
+#else
+# define LOAD_PIC_REG(reg)
+#endif
--- libdv-0.104-old/libdv/dct_block_mmx.S
+++ libdv-0.104/libdv/dct_block_mmx.S
@@ -53,19 +53,22 @@ scratch2:       .quad 0
 
 .section .note.GNU-stack, "", @progbits
 
+#include "asm_common.S"
+
 .text
 
 .align 8	
 .global _dv_dct_88_block_mmx
 .hidden _dv_dct_88_block_mmx
 .type   _dv_dct_88_block_mmx,@function
 _dv_dct_88_block_mmx:
 
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 
-	movl    8(%ebp), %esi          # source
+	LOAD_PIC_REG(bp)
+
+	movl    12(%esp), %esi          # source
 
 # column 0
 	movq 16*0(%esi), %mm0          # v0
@@ -86,22 +91,22 @@ _dv_dct_88_block_mmx:
 
 	movq 16*3(%esi), %mm5          # v3
 	movq 16*4(%esi), %mm7          # v4
-	movq  %mm7, scratch1           # scratch1: v4   ; 
+	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
 	movq  %mm5, %mm7               # duplicate v3 
-	paddw scratch1, %mm5           # v03: v3+v4  
-	psubw scratch1, %mm7           # v04: v3-v4  
-	movq  %mm5, scratch2           # scratch2: v03
+	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
+	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
+	movq  %mm5, MUNG(scratch2)     # scratch2: v03
 	movq  %mm0, %mm5               # mm5: v00
 
-	paddw scratch2, %mm0           # v10: v00+v03   
-	psubw scratch2, %mm5           # v13: v00-v03   
-	movq  %mm3, scratch3           # scratch3: v02
+	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
+	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
+	movq  %mm3, MUNG(scratch3)     # scratch3: v02
 	movq  %mm1, %mm3               # duplicate v01
 
-	paddw scratch3, %mm1          # v11: v01+v02
-	psubw scratch3, %mm3          # v12: v01-v02
+	paddw MUNG(scratch3), %mm1    # v11: v01+v02
+	psubw MUNG(scratch3), %mm3    # v12: v01-v02
 
-	movq  %mm6, scratch4           # scratch4: v05
+	movq  %mm6, MUNG(scratch4)     # scratch4: v05
 	movq  %mm0, %mm6               # duplicate v10
 
 	paddw %mm1, %mm0              # v10+v11
@@ -111,10 +116,10 @@ _dv_dct_88_block_mmx:
 	movq  %mm6, 16*4(%esi)         # out4: v10-v11 
 
 	movq  %mm4, %mm0               # mm0: v06
-	paddw scratch4, %mm4          # v15: v05+v06 
+	paddw MUNG(scratch4), %mm4    # v15: v05+v06 
 	paddw  %mm2, %mm0             # v16: v07+v06
 
-	pmulhw WA3, %mm4               # v35~: WA3*v15
+	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
 	psllw  $1, %mm4                # v35: compensate the coeefient scale
 
 	movq   %mm4, %mm6              # duplicate v35
@@ -123,7 +128,7 @@ _dv_dct_88_block_mmx:
 
 	paddw  %mm5, %mm3             # v22: v12+v13
 
-	pmulhw WA1, %mm3               # v32~: WA1*v22
+	pmulhw MUNG(WA1), %mm3         # v32~: WA1*v22
 	psllw  $16-NSHIFT, %mm3        # v32: compensate the coeefient scale
 	movq   %mm5, %mm6              # duplicate v13
 
@@ -134,13 +139,13 @@ _dv_dct_88_block_mmx:
 	movq  %mm6, 16*6(%esi)         # out6: v13-v32 
 
 
-	paddw  scratch4, %mm7         # v14n: v04+v05
+	paddw  MUNG(scratch4), %mm7   # v14n: v04+v05
 	movq   %mm0, %mm5              # duplicate v16
 
 	psubw  %mm7, %mm0             # va1: v16-v14n
-	pmulhw WA5, %mm0               # va0~:  va1*WA5
-	pmulhw WA4, %mm5               # v36~~: v16*WA4
-	pmulhw WA2, %mm7               # v34~~: v14n*WA2
+	pmulhw MUNG(WA5), %mm0         # va0~:  va1*WA5
+	pmulhw MUNG(WA4), %mm5         # v36~~: v16*WA4
+	pmulhw MUNG(WA2), %mm7         # v34~~: v14n*WA2
 	psllw  $16-WA4_SHIFT, %mm5     # v36: compensate the coeefient scale 
 	psllw  $16-NSHIFT, %mm7        # v34: compensate the coeefient scale
 
@@ -188,22 +193,22 @@ _dv_dct_88_block_mmx:
 
 	movq 16*3(%esi), %mm5              # v3
 	movq 16*4(%esi), %mm7              # v4
-	movq  %mm7, scratch1                    # scratch1: v4   ; 
+	movq  %mm7, MUNG(scratch1)     # scratch1: v4   ; 
 	movq  %mm5, %mm7               # duplicate v3 
-	paddw scratch1, %mm5           # v03: v3+v4  
-	psubw scratch1, %mm7           # v04: v3-v4  
-	movq  %mm5, scratch2        # scratch2: v03
+	paddw MUNG(scratch1), %mm5     # v03: v3+v4  
+	psubw MUNG(scratch1), %mm7     # v04: v3-v4  
+	movq  %mm5, MUNG(scratch2)     # scratch2: v03
 	movq  %mm0, %mm5               # mm5: v00
 
-	paddw scratch2, %mm0           # v10: v00+v03   
-	psubw scratch2, %mm5           # v13: v00-v03   
-	movq  %mm3, scratch3         # scratc3: v02
+	paddw MUNG(scratch2), %mm0     # v10: v00+v03   
+	psubw MUNG(scratch2), %mm5     # v13: v00-v03   
+	movq  %mm3, MUNG(scratch3)     # scratc3: v02
 	movq  %mm1, %mm3               # duplicate v01
 
-	paddw scratch3, %mm1           # v11: v01+v02
-	psubw scratch3, %mm3           # v12: v01-v02
+	paddw MUNG(scratch3), %mm1     # v11: v01+v02
+	psubw MUNG(scratch3), %mm3     # v12: v01-v02
 
-	movq  %mm6, scratch4         # scratc4: v05
+	movq  %mm6, MUNG(scratch4)     # scratc4: v05
 	movq  %mm0, %mm6               # duplicate v10
 
 	paddw %mm1, %mm0                            # v10+v11
@@ -213,10 +218,10 @@ _dv_dct_88_block_mmx:
 	movq  %mm6, 16*4(%esi)          # out4: v10-v11 
 
 	movq  %mm4, %mm0             # mm0: v06
-	paddw scratch4, %mm4         # v15: v05+v06 
+	paddw MUNG(scratch4), %mm4     # v15: v05+v06 
 	paddw  %mm2, %mm0                       # v16: v07+v06
 
-	pmulhw WA3, %mm4           # v35~: WA3*v15
+	pmulhw MUNG(WA3), %mm4         # v35~: WA3*v15
 	psllw  $16-NSHIFT, %mm4       # v35: compensate the coeefient scale
 
 	movq   %mm4, %mm6            # duplicate v35
@@ -225,7 +230,7 @@ _dv_dct_88_block_mmx:
 
 	paddw  %mm5, %mm3            # v22: v12+v13
 
-	pmulhw WA1, %mm3           # v32~: WA3*v15
+	pmulhw MUNG(WA1), %mm3         # v32~: WA3*v15
 	psllw  $16-NSHIFT, %mm3       # v32: compensate the coeefient scale
 	movq   %mm5, %mm6            # duplicate v13
 
@@ -235,13 +240,13 @@ _dv_dct_88_block_mmx:
 	movq  %mm5, 16*2(%esi)          # out2: v13+v32 
 	movq  %mm6, 16*6(%esi)          # out6: v13-v32 
 
-	paddw  scratch4, %mm7                           # v14n: v04+v05
+	paddw  MUNG(scratch4), %mm7     # v14n: v04+v05
 	movq   %mm0, %mm5                               # duplicate v16
 
 	psubw  %mm7, %mm0                               # va1: v16-v14n
-	pmulhw WA2, %mm7                # v34~~: v14n*WA2
-	pmulhw WA5, %mm0                # va0~:  va1*WA5
-	pmulhw WA4, %mm5                        # v36~~: v16*WA4
+	pmulhw MUNG(WA2), %mm7          # v34~~: v14n*WA2
+	pmulhw MUNG(WA5), %mm0          # va0~:  va1*WA5
+	pmulhw MUNG(WA4), %mm5          # v36~~: v16*WA4
 	psllw  $16-NSHIFT, %mm7
 	psllw  $16-WA4_SHIFT, %mm5      # v36: compensate the coeffient 
 		# scale note that WA4 is shifted 1 bit less than the others
@@ -748,11 +755,12 @@ _dv_dct_block_mmx_postscale_88:
 _dv_dct_248_block_mmx:
 
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl   %edi
 
-	movl    8(%ebp), %esi          # source
+	LOAD_PIC_REG(bp)
+
+	movl    16(%esp), %esi          # source
 
 # column 0
 
@@ -779,7 +789,7 @@ _dv_dct_248_block_mmx:
 	paddw %mm1, %mm0	       # v20: v10+v11
 	psubw %mm1, %mm3	       # v21: v10-v11
 
-	pmulhw WA1, %mm5               # v32~: WA1*v22
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
 	movq  %mm4, %mm2	
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
 
@@ -818,7 +828,7 @@ _dv_dct_248_block_mmx:
 	paddw %mm1, %mm0	       # v20: v10+v11
 	psubw %mm1, %mm3	       # v21: v10-v11
 
-	pmulhw WA1, %mm5               # v32~: WA1*v22
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
 	movq  %mm4, %mm2	
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
 
@@ -855,7 +865,7 @@ _dv_dct_248_block_mmx:
 	paddw %mm1, %mm0	       # v20: v10+v11
 	psubw %mm1, %mm3	       # v21: v10-v11
 
-	pmulhw WA1, %mm5               # v32~: WA1*v22
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
 	movq  %mm4, %mm2	
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
 
@@ -892,7 +902,7 @@ _dv_dct_248_block_mmx:
 	paddw %mm1, %mm0	       # v20: v10+v11
 	psubw %mm1, %mm3	       # v21: v10-v11
 
-	pmulhw WA1, %mm5               # v32~: WA1*v22
+	pmulhw MUNG(WA1), %mm5         # v32~: WA1*v22
 	movq  %mm4, %mm2	
 	psllw  $16-NSHIFT, %mm5        # v32: compensate the coeffient scale
 
--- libdv-0.104-old/libdv/dv.c
+++ libdv-0.104/libdv/dv.c
@@ -205,6 +205,9 @@ dv_reconfigure(int clamp_luma, int clamp
 } /* dv_reconfigure */
 
 
+extern uint8_t dv_quant_offset[4];
+extern uint8_t dv_quant_shifts[22][4];
+
 static inline void 
 dv_decode_macroblock(dv_decoder_t *dv, dv_macroblock_t *mb, unsigned int quality) {
   int i;
@@ -218,7 +221,7 @@ dv_decode_macroblock(dv_decoder_t *dv, d
       dv_idct_248 (co248, mb->b[i].coeffs);
     } else {
 #if ARCH_X86
-      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
+      _dv_quant_88_inverse_x86(mb->b[i].coeffs,mb->qno,mb->b[i].class_no,dv_quant_offset,dv_quant_shifts);
       _dv_idct_88(mb->b[i].coeffs);
 #elif ARCH_X86_64
       _dv_quant_88_inverse_x86_64(mb->b[i].coeffs,mb->qno,mb->b[i].class_no);
@@ -250,7 +253,7 @@ dv_decode_video_segment(dv_decoder_t *dv
 	dv_idct_248 (co248, mb->b[b].coeffs);
       } else {
 #if ARCH_X86
-	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no);
+	_dv_quant_88_inverse_x86(bl->coeffs,mb->qno,bl->class_no,dv_quant_offset,dv_quant_shifts);
 	_dv_weight_88_inverse(bl->coeffs);
 	_dv_idct_88(bl->coeffs);
 #elif ARCH_X86_64
--- libdv-0.104-old/libdv/encode.c
+++ libdv-0.104/libdv/encode.c
@@ -521,7 +521,8 @@ static void reorder_block(dv_block_t *bl
 }
 
 extern unsigned long _dv_vlc_encode_block_mmx(dv_coeff_t* coeffs,
-					  dv_vlc_entry_t ** out);
+					  dv_vlc_entry_t ** out,
+					  dv_vlc_entry_t * lookup);
 
 extern unsigned long _dv_vlc_encode_block_mmx_x86_64(dv_coeff_t* coeffs,
 					  dv_vlc_entry_t ** out);
@@ -558,7 +559,7 @@ static unsigned long vlc_encode_block(dv
 #elif ARCH_X86
 	int num_bits;
 
-	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o);
+	num_bits = _dv_vlc_encode_block_mmx(coeffs, &o, vlc_encode_lookup);
 	emms();
 #else
 	int num_bits;
@@ -574,7 +575,7 @@ static unsigned long vlc_encode_block(dv
 	return num_bits;
 }
 
-extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs);
+extern unsigned long _dv_vlc_num_bits_block_x86(dv_coeff_t* coeffs, unsigned char* lookup);
 extern unsigned long _dv_vlc_num_bits_block_x86_64(dv_coeff_t* coeffs);
 
 extern unsigned long _dv_vlc_num_bits_block(dv_coeff_t* coeffs)
@@ -600,7 +601,7 @@ extern unsigned long _dv_vlc_num_bits_bl
 #elif ARCH_X86_64
 	return _dv_vlc_num_bits_block_x86_64(coeffs);
 #else
-	return _dv_vlc_num_bits_block_x86(coeffs);
+	return _dv_vlc_num_bits_block_x86(coeffs, vlc_num_bits_lookup);
 #endif
 }
 
--- libdv-0.104-old/libdv/encode_x86.S
+++ libdv-0.104/libdv/encode_x86.S
@@ -23,9 +23,6 @@
  *  The libdv homepage is http://libdv.sourceforge.net/.  
  */
 
-.data
-ALLONE:		.word 1,1,1,1
-VLCADDMASK:	.byte 255,0,0,0,255,0,0,0
 		
 
 .section .note.GNU-stack, "", @progbits
@@ -45,11 +43,14 @@ _dv_vlc_encode_block_mmx:	
 
 	movl	$63, %ecx
 
-	movl	vlc_encode_lookup, %esi
+	movl	4+4*4+8(%esp), %esi              # vlc_encode_lookup
 
 	pxor	%mm0, %mm0
 	pxor	%mm2, %mm2
-	movq	VLCADDMASK, %mm1
+	pushl	$0x000000FF                      # these four lines
+	pushl	$0x000000FF                      # load VLCADDMASK
+	movq	(%esp), %mm1                     # into %mm1 off the stack
+	addl	$8, %esp                         #  --> no TEXTRELs
 	xorl	%ebp, %ebp
 	subl	$8, %edx
 vlc_encode_block_mmx_loop:
@@ -121,7 +124,7 @@ _dv_vlc_num_bits_block_x86:	
 	addl	$2, %edi
 
 	movl	$63, %ecx
-	movl	vlc_num_bits_lookup, %esi
+	movl	4+4*4+4(%esp), %esi              # vlc_num_bits_lookup
 	
 vlc_num_bits_block_x86_loop:
 	movw	(%edi), %ax
@@ -579,8 +590,11 @@ _dv_need_dct_248_mmx_rows:
 	paddw	%mm5, %mm1
 
 	paddw	%mm1, %mm0
-	
-	pmaddwd	ALLONE, %mm0	
+
+	pushl	$0x00010001              # these four lines
+	pushl	$0x00010001              # load ALLONE
+	pmaddwd	(%esp), %mm0             # into %mm0 off the stack
+	addl	$8, %esp                 #  --> no TEXTRELs
 	movq	%mm0, %mm1
 	psrlq	$32, %mm1
 	paddd	%mm1, %mm0
--- libdv-0.104-old/libdv/idct_block_mmx.S
+++ libdv-0.104/libdv/idct_block_mmx.S
@@ -8,17 +8,21 @@
 
 .section .note.GNU-stack, "", @progbits
 
+#include "asm_common.S"
+
 .text
 	.align 4
 .global _dv_idct_block_mmx
 .hidden _dv_idct_block_mmx
 .type   _dv_idct_block_mmx,@function
 _dv_idct_block_mmx:
 	pushl	 %ebp
-	movl	 %esp,%ebp
 	pushl	 %esi
-	leal	 preSC, %ecx
-	movl	 8(%ebp),%esi		/* source matrix */
+
+	LOAD_PIC_REG(bp)
+
+	leal	 MUNG(preSC), %ecx
+	movl	 12(%esp),%esi		/* source matrix */
 
 /* 
  *	column 0: even part
@@ -35,7 +41,7 @@ _dv_idct_block_mmx:
 	movq %mm1, %mm2			/* added 11/1/96 */
 	pmulhw 8*8(%esi),%mm5		/* V8 */
 	psubsw %mm0, %mm1		/* V16 */
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V18 */
+	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V18 */
 	paddsw %mm0, %mm2		/* V17 */
 	movq %mm2, %mm0			/* duplicate V17 */
 	psraw $1, %mm2			/* t75=t82 */
@@ -76,7 +82,7 @@ _dv_idct_block_mmx:
 	paddsw %mm0, %mm3		/* V29 ; free mm0 */
 	movq %mm7, %mm1			/* duplicate V26 */
 	psraw $1, %mm3			/* t91=t94 */
-	pmulhw x539f539f539f539f,%mm7	/* V33 */
+	pmulhw MUNG(x539f539f539f539f),%mm7	/* V33 */
 	psraw $1, %mm1			/* t96 */
 	movq %mm5, %mm0			/* duplicate V2 */
 	psraw $2, %mm4			/* t85=t87 */
@@ -84,15 +90,15 @@ _dv_idct_block_mmx:
 	psubsw %mm4, %mm0		/* V28 ; free mm4 */
 	movq %mm0, %mm2			/* duplicate V28 */
 	psraw $1, %mm5			/* t90=t93 */
-	pmulhw x4546454645464546,%mm0	/* V35 */
+	pmulhw MUNG(x4546454645464546),%mm0	/* V35 */
 	psraw $1, %mm2			/* t97 */
 	movq %mm5, %mm4			/* duplicate t90=t93 */
 	psubsw %mm2, %mm1		/* V32 ; free mm2 */
-	pmulhw x61f861f861f861f8,%mm1	/* V36 */
+	pmulhw MUNG(x61f861f861f861f8),%mm1	/* V36 */
 	psllw $1, %mm7			/* t107 */
 	paddsw %mm3, %mm5		/* V31 */
 	psubsw %mm3, %mm4		/* V30 ; free mm3 */
-	pmulhw x5a825a825a825a82,%mm4	/* V34 */
+	pmulhw MUNG(x5a825a825a825a82),%mm4	/* V34 */
 	nop
 	psubsw %mm1, %mm0		/* V38 */
 	psubsw %mm7, %mm1		/* V37 ; free mm7 */
@@ -159,7 +165,7 @@ _dv_idct_block_mmx:
 	psubsw %mm7, %mm1		/* V50 */
 	pmulhw 8*9(%esi), %mm5		/* V9 */
 	paddsw %mm7, %mm2		/* V51 */
-	pmulhw x5a825a825a825a82, %mm1	/* 23170 ->V52 */
+	pmulhw MUNG(x5a825a825a825a82), %mm1	/* 23170 ->V52 */
 	movq %mm2, %mm6			/* duplicate V51 */
 	psraw $1, %mm2			/* t138=t144 */
 	movq %mm3, %mm4			/* duplicate V1 */
@@ -200,11 +206,11 @@ _dv_idct_block_mmx:
  * even more by doing the correction step in a later stage when the number
  * is actually multiplied by 16
  */
-	paddw x0005000200010001, %mm4
+	paddw MUNG(x0005000200010001), %mm4
 	psubsw %mm6, %mm3		/* V60 ; free mm6 */
 	psraw $1, %mm0			/* t154=t156 */
 	movq %mm3, %mm1			/* duplicate V60 */
-	pmulhw x539f539f539f539f, %mm1	/* V67 */
+	pmulhw MUNG(x539f539f539f539f), %mm1	/* V67 */
 	movq %mm5, %mm6			/* duplicate V3 */
 	psraw $2, %mm4			/* t148=t150 */
 	paddsw %mm4, %mm5		/* V61 */
@@ -213,13 +219,13 @@ _dv_idct_block_mmx:
 	psllw $1, %mm1			/* t169 */
 	paddsw %mm0, %mm5		/* V65 -> result */
 	psubsw %mm0, %mm4		/* V64 ; free mm0 */
-	pmulhw x5a825a825a825a82, %mm4	/* V68 */
+	pmulhw MUNG(x5a825a825a825a82), %mm4	/* V68 */
 	psraw $1, %mm3			/* t158 */
 	psubsw %mm6, %mm3		/* V66 */
 	movq %mm5, %mm2			/* duplicate V65 */
-	pmulhw x61f861f861f861f8, %mm3	/* V70 */
+	pmulhw MUNG(x61f861f861f861f8), %mm3	/* V70 */
 	psllw $1, %mm6			/* t165 */
-	pmulhw x4546454645464546, %mm6	/* V69 */
+	pmulhw MUNG(x4546454645464546), %mm6	/* V69 */
 	psraw $1, %mm2			/* t172 */
 /* moved from next block */
 	movq 8*5(%esi), %mm0		/* V56 */
@@ -344,7 +350,7 @@ _dv_idct_block_mmx:
 *	movq 8*13(%esi), %mm4		tmt13
 */
 	psubsw %mm4, %mm3		/* V134 */
-	pmulhw x5a825a825a825a82, %mm3	/* 23170 ->V136 */
+	pmulhw MUNG(x5a825a825a825a82), %mm3	/* 23170 ->V136 */
 	movq 8*9(%esi), %mm6		/* tmt9 */
 	paddsw %mm4, %mm5		/* V135 ; mm4 free */
 	movq %mm0, %mm4			/* duplicate tmt1 */
@@ -373,17 +379,17 @@ _dv_idct_block_mmx:
 	psubsw %mm7, %mm0		/* V144 */
 	movq %mm0, %mm3			/* duplicate V144 */
 	paddsw %mm7, %mm2		/* V147 ; free mm7 */
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V151 */
+	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V151 */
 	movq %mm1, %mm7			/* duplicate tmt3 */
 	paddsw %mm5, %mm7		/* V145 */
 	psubsw %mm5, %mm1		/* V146 ; free mm5 */
 	psubsw %mm1, %mm3		/* V150 */
 	movq %mm7, %mm5			/* duplicate V145 */
-	pmulhw x4546454645464546, %mm1	/* 17734-> V153 */
+	pmulhw MUNG(x4546454645464546), %mm1	/* 17734-> V153 */
 	psubsw %mm2, %mm5		/* V148 */
-	pmulhw x61f861f861f861f8, %mm3	/* 25080-> V154 */
+	pmulhw MUNG(x61f861f861f861f8), %mm3	/* 25080-> V154 */
 	psllw $2, %mm0			/* t311 */
-	pmulhw x5a825a825a825a82, %mm5	/* 23170-> V152 */
+	pmulhw MUNG(x5a825a825a825a82), %mm5	/* 23170-> V152 */
 	paddsw %mm2, %mm7		/* V149 ; free mm2 */
 	psllw $1, %mm1			/* t313 */
 	nop	/* without the nop - freeze here for one clock */
@@ -409,7 +415,7 @@ _dv_idct_block_mmx:
 	paddsw %mm3, %mm6		/* V164 ; free mm3 */
 	movq %mm4, %mm3			/* duplicate V142 */
 	psubsw %mm5, %mm4		/* V165 ; free mm5 */
-	movq %mm2, scratch7		/* out7 */
+	movq %mm2, MUNG(scratch7)		/* out7 */
 	psraw $4, %mm6
 	psraw $4, %mm4
 	paddsw %mm5, %mm3		/* V162 */
@@ -420,11 +426,11 @@ _dv_idct_block_mmx:
  */
 	movq %mm6, 8*9(%esi)		/* out9 */
 	paddsw %mm1, %mm0		/* V161 */
-	movq %mm3, scratch5		/* out5 */
+	movq %mm3, MUNG(scratch5)		/* out5 */
 	psubsw %mm1, %mm5		/* V166 ; free mm1 */
 	movq %mm4, 8*11(%esi)		/* out11 */
 	psraw $4, %mm5
-	movq %mm0, scratch3		/* out3 */
+	movq %mm0, MUNG(scratch3)		/* out3 */
 	movq %mm2, %mm4			/* duplicate V140 */
 	movq %mm5, 8*13(%esi)		/* out13 */
 	paddsw %mm7, %mm2		/* V160 */
@@ -434,7 +440,7 @@ _dv_idct_block_mmx:
 /* moved from the next block */
 	movq 8*3(%esi), %mm7
 	psraw $4, %mm4
-	movq %mm2, scratch1		/* out1 */
+	movq %mm2, MUNG(scratch1)		/* out1 */
 /* moved from the next block */
 	movq %mm0, %mm1
 	movq %mm4, 8*15(%esi)		/* out15 */
@@ -491,15 +497,15 @@ _dv_idct_block_mmx:
 	paddsw %mm4, %mm3		/* V113 ; free mm4 */
 	movq %mm0, %mm4			/* duplicate V110 */
 	paddsw %mm1, %mm2		/* V111 */
-	pmulhw x539f539f539f539f, %mm0	/* 21407-> V117 */
+	pmulhw MUNG(x539f539f539f539f), %mm0	/* 21407-> V117 */
 	psubsw %mm1, %mm5		/* V112 ; free mm1 */
 	psubsw %mm5, %mm4		/* V116 */
 	movq %mm2, %mm1			/* duplicate V111 */
-	pmulhw x4546454645464546, %mm5	/* 17734-> V119 */
+	pmulhw MUNG(x4546454645464546), %mm5	/* 17734-> V119 */
 	psubsw %mm3, %mm2		/* V114 */
-	pmulhw x61f861f861f861f8, %mm4	/* 25080-> V120 */
+	pmulhw MUNG(x61f861f861f861f8), %mm4	/* 25080-> V120 */
 	paddsw %mm3, %mm1		/* V115 ; free mm3 */
-	pmulhw x5a825a825a825a82, %mm2	/* 23170-> V118 */
+	pmulhw MUNG(x5a825a825a825a82), %mm2	/* 23170-> V118 */
 	psllw $2, %mm0			/* t266 */
 	movq %mm1, (%esi)		/* save V115 */
 	psllw $1, %mm5			/* t268 */
@@ -517,7 +523,7 @@ _dv_idct_block_mmx:
 	movq %mm6, %mm3			/* duplicate tmt4 */
 	psubsw %mm0, %mm6		/* V100 */
 	paddsw %mm0, %mm3		/* V101 ; free mm0 */
-	pmulhw x5a825a825a825a82, %mm6	/* 23170 ->V102 */
+	pmulhw MUNG(x5a825a825a825a82), %mm6	/* 23170 ->V102 */
 	movq %mm7, %mm5			/* duplicate tmt0 */
 	movq 8*8(%esi), %mm1		/* tmt8 */
 	paddsw %mm1, %mm7		/* V103 */
@@ -551,10 +557,10 @@ _dv_idct_block_mmx:
 	movq 8*2(%esi), %mm3		/* V123 */
 	paddsw %mm4, %mm7		/* out0 */
 /* moved up from next block */
-	movq scratch3, %mm0
+	movq MUNG(scratch3), %mm0
 	psraw $4, %mm7
 /* moved up from next block */
-	movq scratch5, %mm6 
+	movq MUNG(scratch5), %mm6 
 	psubsw %mm4, %mm1		/* out14 ; free mm4 */
 	paddsw %mm3, %mm5		/* out2 */
 	psraw $4, %mm1
@@ -565,7 +571,7 @@ _dv_idct_block_mmx:
 	movq %mm5, 8*2(%esi)		/* out2 ; free mm5 */
 	psraw $4, %mm2
 /* moved up to the prev block */
-	movq scratch7, %mm4
+	movq MUNG(scratch7), %mm4
 /* moved up to the prev block */
 	psraw $4, %mm0
 	movq %mm2, 8*12(%esi)		/* out12 ; free mm2 */
@@ -579,7 +585,7 @@ _dv_idct_block_mmx:
  *	psraw $4, %mm0
  *	psraw $4, %mm6
 */
-	movq scratch1, %mm1
+	movq MUNG(scratch1), %mm1
 	psraw $4, %mm4
 	movq %mm0, 8*3(%esi)		/* out3 */
 	psraw $4, %mm1
--- libdv-0.104-old/libdv/parse.c
+++ libdv-0.104/libdv/parse.c
@@ -477,6 +477,13 @@ dv_parse_ac_coeffs(dv_videosegment_t *se
   exit(0);
 #endif
 } /* dv_parse_ac_coeffs */
+#if defined __GNUC__ && __ELF__
+# define dv_strong_hidden_alias(name, aliasname) \
+    extern __typeof (name) aliasname __attribute__ ((alias (#name), visibility ("hidden")))
+dv_strong_hidden_alias(dv_parse_ac_coeffs, asm_dv_parse_ac_coeffs);
+#else
+int asm_dv_parse_ac_coeffs(dv_videosegment_t *seg) { return dv_parse_ac_coeffs(seg); }
+#endif
 
 /* ---------------------------------------------------------------------------
  */
--- libdv-0.104-old/libdv/quant.c
+++ libdv-0.104/libdv/quant.c
@@ -144,7 +144,7 @@ uint8_t  dv_quant_offset[4] = { 6,3,0,1 
 uint32_t	dv_quant_248_mul_tab [2] [22] [64];
 uint32_t dv_quant_88_mul_tab [2] [22] [64];
 
-extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass);
+extern void             _dv_quant_x86(dv_coeff_t *block,int qno,int klass,uint8_t *dv_quant_offset,uint8_t *dv_quant_shifts);
 extern void             _dv_quant_x86_64(dv_coeff_t *block,int qno,int klass);
 static void quant_248_inverse_std(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
 static void quant_248_inverse_mmx(dv_coeff_t *block,int qno,int klass,dv_248_coeff_t *co);
@@ -210,7 +210,7 @@ void _dv_quant(dv_coeff_t *block,int qno
 		_dv_quant_x86_64(block, qno, klass);
 		emms();
 #else
-		_dv_quant_x86(block, qno, klass);
+		_dv_quant_x86(block, qno, klass, dv_quant_offset, dv_quant_shifts);
 		emms();
 #endif
 	}
--- libdv-0.104-old/libdv/quant.h
+++ libdv-0.104/libdv/quant.h
@@ -27,7 +27,7 @@ extern void _dv_quant(dv_coeff_t *block,
 extern void _dv_quant_88_inverse(dv_coeff_t *block,int qno,int klass);
 extern void (*_dv_quant_248_inverse) (dv_coeff_t *block,int qno,int klass,
                                   dv_248_coeff_t *co);
-extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass);
+extern void _dv_quant_88_inverse_x86(dv_coeff_t *block,int qno,int klass, uint8_t *offset, uint8_t *shifts);
 extern void _dv_quant_88_inverse_x86_64(dv_coeff_t *block,int qno,int klass);
 extern void dv_quant_init (void);
 #ifdef __cplusplus
--- libdv-0.104-old/libdv/quant_x86.S
+++ libdv-0.104/libdv/quant_x86.S
@@ -71,10 +73,13 @@ _dv_quant_88_inverse_x86:	
 	
 	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
 	movl	ARGn(1),%eax	/* qno */
+	movl	ARGn(3),%ebx	/* dv_quant_offset */
+	addl	ARGn(2),%ebx	/* class */
+	movzbl	(%ebx),%ecx
 	movl	ARGn(2),%ebx	/* class */
-	movzbl	dv_quant_offset(%ebx),%ecx
 	addl	%ecx,%eax
-	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
+	movl	ARGn(4),%edx	/* dv_quant_shifts */
+	leal	(%edx,%eax,4),%edx	/* edx is pq */
 
 	/* extra = (class == 3); */
 				/*  0   1   2   3 */
@@ -212,11 +219,13 @@ _dv_quant_x86:	
 	
 	/*  pq = dv_quant_shifts[qno + dv_quant_offset[class]]; */
 	movl	ARGn(1),%eax	/* qno */
+	movl	ARGn(3),%ebx	/* offset */
+	addl	ARGn(2),%ebx	/* class */
+	movzbl	(%ebx),%ecx
 	movl	ARGn(2),%ebx	/* class */
-
-	movzbl	dv_quant_offset(%ebx),%ecx
+	movl	ARGn(4),%edx	/* shifts */
 	addl	%ecx,%eax
-	leal	dv_quant_shifts(,%eax,4),%edx	/* edx is pq */
+	leal	(%edx,%eax,4),%edx	/* edx is pq */
 
 	/* extra = (class == 3); */
 				/*  0   1   2   3 */
--- libdv-0.104-old/libdv/rgbtoyuv.S
+++ libdv-0.104/libdv/rgbtoyuv.S
@@ -41,9 +41,6 @@
 #define DV_WIDTH_SHORT_HALF 720
 #define DV_WIDTH_BYTE_HALF  360	
 		
-.global _dv_rgbtoycb_mmx
-# .global yuvtoycb_mmx
-
 .data
 
 .align 8
@@ -110,25 +107,26 @@ VR0GR:  .long   0,0
 VBG0B:  .long   0,0
 	
 #endif	
-	
+
+#include "asm_common.S"
+
 .section .note.GNU-stack, "", @progbits
 
 .text
 
-#define _inPtr     8
-#define _rows      12
-#define _columns   16
-#define _outyPtr   20
-#define _outuPtr   24
-#define _outvPtr   28
+#define _inPtr     24+8
+#define _rows      24+12
+#define _columns   24+16
+#define _outyPtr   24+20
+#define _outuPtr   24+24
+#define _outvPtr   24+28
 
 .global _dv_rgbtoycb_mmx
 .hidden _dv_rgbtoycb_mmx
 .type   _dv_rgbtoycb_mmx,@function
 _dv_rgbtoycb_mmx:
 
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %eax
 	pushl   %ebx
 	pushl   %ecx
@@ -131,46 +132,47 @@ _dv_rgbtoycb_mmx:
 	pushl   %esi
 	pushl   %edi
 
-	leal    ZEROSX, %eax    #This section gets around a bug
+	LOAD_PIC_REG(bp)
+
+	leal    MUNG(ZEROSX), %eax    #This section gets around a bug
 	movq    (%eax), %mm0    #unlikely to persist
-	movq    %mm0, ZEROS
-	leal    OFFSETDX, %eax
+	movq    %mm0, MUNG(ZEROS)
+	leal    MUNG(OFFSETDX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, OFFSETD
-	leal    OFFSETWX, %eax
+	movq    %mm0, MUNG(OFFSETD)
+	leal    MUNG(OFFSETWX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, OFFSETW
-	leal    OFFSETBX, %eax
+	movq    %mm0, MUNG(OFFSETW)
+	leal    MUNG(OFFSETBX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, OFFSETB
-	leal    YR0GRX, %eax
+	movq    %mm0, MUNG(OFFSETB)
+	leal    MUNG(YR0GRX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, YR0GR
-	leal    YBG0BX, %eax
+	movq    %mm0, MUNG(YR0GR)
+	leal    MUNG(YBG0BX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, YBG0B
-	leal    UR0GRX, %eax
+	movq    %mm0, MUNG(YBG0B)
+	leal    MUNG(UR0GRX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, UR0GR
-	leal    UBG0BX, %eax
+	movq    %mm0, MUNG(UR0GR)
+	leal    MUNG(UBG0BX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, UBG0B
-	leal    VR0GRX, %eax
+	movq    %mm0, MUNG(UBG0B)
+	leal    MUNG(VR0GRX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, VR0GR
-	leal    VBG0BX, %eax
+	movq    %mm0, MUNG(VR0GR)
+	leal    MUNG(VBG0BX), %eax
 	movq    (%eax), %mm0
-	movq    %mm0, VBG0B
-	
-	movl    _rows(%ebp), %eax
-	movl    _columns(%ebp), %ebx
+	movq    %mm0, MUNG(VBG0B)
+	movl    _rows(%esp), %eax
+	movl    _columns(%esp), %ebx
 	mull    %ebx            #number pixels
 	shrl    $3, %eax        #number of loops
 	movl    %eax, %edi      #loop counter in edi
-	movl    _inPtr(%ebp), %eax
-	movl    _outyPtr(%ebp), %ebx
-	movl    _outuPtr(%ebp), %ecx
-	movl    _outvPtr(%ebp), %edx
+	movl    _inPtr(%esp), %eax
+	movl    _outyPtr(%esp), %ebx
+	movl    _outuPtr(%esp), %ecx
+	movl    _outvPtr(%esp), %edx
 rgbtoycb_mmx_loop: 
 	movq    (%eax), %mm1    #load G2R2B1G1R1B0G0R0
 	pxor    %mm6, %mm6      #0 -> mm6
@@ -184,29 +186,29 @@ rgbtoycb_mmx_loop: 
 	punpcklbw %mm6, %mm1     #B1G1R1B0 -> mm1
 	movq    %mm0, %mm2      #R1B0G0R0 -> mm2
 
-	pmaddwd YR0GR, %mm0     #yrR1,ygG0+yrR0 -> mm0
+	pmaddwd MUNG(YR0GR), %mm0     #yrR1,ygG0+yrR0 -> mm0
 	movq    %mm1, %mm3      #B1G1R1B0 -> mm3
 
-	pmaddwd YBG0B, %mm1     #ybB1+ygG1,ybB0 -> mm1
+	pmaddwd MUNG(YBG0B), %mm1     #ybB1+ygG1,ybB0 -> mm1
 	movq    %mm2, %mm4      #R1B0G0R0 -> mm4
 
-	pmaddwd UR0GR, %mm2     #urR1,ugG0+urR0 -> mm2
+	pmaddwd MUNG(UR0GR), %mm2     #urR1,ugG0+urR0 -> mm2
 	movq    %mm3, %mm5      #B1G1R1B0 -> mm5
 
-	pmaddwd UBG0B, %mm3     #ubB1+ugG1,ubB0 -> mm3
+	pmaddwd MUNG(UBG0B), %mm3     #ubB1+ugG1,ubB0 -> mm3
 	punpckhbw       %mm6, %mm7 #    00G2R2 -> mm7
 
-	pmaddwd VR0GR, %mm4     #vrR1,vgG0+vrR0 -> mm4
+	pmaddwd MUNG(VR0GR), %mm4     #vrR1,vgG0+vrR0 -> mm4
 	paddd   %mm1, %mm0      #Y1Y0 -> mm0
 
-	pmaddwd VBG0B, %mm5     #vbB1+vgG1,vbB0 -> mm5
+	pmaddwd MUNG(VBG0B), %mm5     #vbB1+vgG1,vbB0 -> mm5
 
 	movq    8(%eax), %mm1   #R5B4G4R4B3G3R3B2 -> mm1
 	paddd   %mm3, %mm2      #U1U0 -> mm2
 
 	movq    %mm1, %mm6      #R5B4G4R4B3G3R3B2 -> mm6
 
-	punpcklbw       ZEROS, %mm1     #B3G3R3B2 -> mm1
+	punpcklbw       MUNG(ZEROS), %mm1     #B3G3R3B2 -> mm1
 	paddd   %mm5, %mm4      #V1V0 -> mm4
 
 	movq    %mm1, %mm5      #B3G3R3B2 -> mm5
@@ -214,29 +216,29 @@ rgbtoycb_mmx_loop: 
 
 	paddd   %mm7, %mm1      #R3B200+00G2R2=R3B2G2R2->mm1
 
-	punpckhbw       ZEROS, %mm6     #R5B4G4R3 -> mm6
+	punpckhbw       MUNG(ZEROS), %mm6     #R5B4G4R3 -> mm6
 	movq    %mm1, %mm3      #R3B2G2R2 -> mm3
 
-	pmaddwd YR0GR, %mm1     #yrR3,ygG2+yrR2 -> mm1
+	pmaddwd MUNG(YR0GR), %mm1     #yrR3,ygG2+yrR2 -> mm1
 	movq    %mm5, %mm7      #B3G3R3B2 -> mm7
 
-	pmaddwd YBG0B, %mm5     #ybB3+ygG3,ybB2 -> mm5
+	pmaddwd MUNG(YBG0B), %mm5     #ybB3+ygG3,ybB2 -> mm5
 	psrad   $FIXPSHIFT, %mm0       #32-bit scaled Y1Y0 -> mm0
 
-	movq    %mm6, TEMP0     #R5B4G4R4 -> TEMP0
+	movq    %mm6, MUNG(TEMP0)     #R5B4G4R4 -> TEMP0
 	movq    %mm3, %mm6      #R3B2G2R2 -> mm6
-	pmaddwd UR0GR, %mm6     #urR3,ugG2+urR2 -> mm6
+	pmaddwd MUNG(UR0GR), %mm6     #urR3,ugG2+urR2 -> mm6
 	psrad   $FIXPSHIFT, %mm2       #32-bit scaled U1U0 -> mm2
 
 	paddd   %mm5, %mm1      #Y3Y2 -> mm1
 	movq    %mm7, %mm5      #B3G3R3B2 -> mm5
-	pmaddwd UBG0B, %mm7     #ubB3+ugG3,ubB2
+	pmaddwd MUNG(UBG0B), %mm7     #ubB3+ugG3,ubB2
 	psrad   $FIXPSHIFT, %mm1 #32-bit scaled Y3Y2 -> mm1
 
-	pmaddwd VR0GR, %mm3     #vrR3,vgG2+vgR2
+	pmaddwd MUNG(VR0GR), %mm3     #vrR3,vgG2+vgR2
 	packssdw        %mm1, %mm0      #Y3Y2Y1Y0 -> mm0
 
-	pmaddwd VBG0B, %mm5     #vbB3+vgG3,vbB2 -> mm5
+	pmaddwd MUNG(VBG0B), %mm5     #vbB3+vgG3,vbB2 -> mm5
 	psrad   $FIXPSHIFT, %mm4       #32-bit scaled V1V0 -> mm4
 
 	movq    16(%eax), %mm1  #B7G7R7B6G6R6B5G5 -> mm7
@@ -251,58 +253,58 @@ rgbtoycb_mmx_loop: 
 	movq    %mm7, %mm5      #R7B6G6R6B5G500 -> mm5
 	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V3V2 -> mm3
 
-	paddw	OFFSETY, %mm0
+	paddw	MUNG(OFFSETY), %mm0
 	movq    %mm0, (%ebx)     #store Y3Y2Y1Y0 
 	packssdw %mm6, %mm2      #32-bit scaled U3U2U1U0 -> mm2
 
-	movq    TEMP0, %mm0     #R5B4G4R4 -> mm0
+	movq    MUNG(TEMP0), %mm0     #R5B4G4R4 -> mm0
 	addl	$8, %ebx
-	
-	punpcklbw       ZEROS, %mm7     #B5G500 -> mm7
+
+	punpcklbw       MUNG(ZEROS), %mm7     #B5G500 -> mm7
 	movq    %mm0, %mm6      #R5B4G4R4 -> mm6
 
-	movq    %mm2, TEMPU     #32-bit scaled U3U2U1U0 -> TEMPU
+	movq    %mm2, MUNG(TEMPU)     #32-bit scaled U3U2U1U0 -> TEMPU
 	psrlq   $32, %mm0       #00R5B4 -> mm0
 
 	paddw   %mm0, %mm7      #B5G5R5B4 -> mm7
 	movq    %mm6, %mm2      #B5B4G4R4 -> mm2
 
-	pmaddwd YR0GR, %mm2     #yrR5,ygG4+yrR4 -> mm2
+	pmaddwd MUNG(YR0GR), %mm2     #yrR5,ygG4+yrR4 -> mm2
 	movq    %mm7, %mm0      #B5G5R5B4 -> mm0
 
-	pmaddwd YBG0B, %mm7     #ybB5+ygG5,ybB4 -> mm7
+	pmaddwd MUNG(YBG0B), %mm7     #ybB5+ygG5,ybB4 -> mm7
 	packssdw        %mm3, %mm4      #32-bit scaled V3V2V1V0 -> mm4
 
 	addl    $24, %eax       #increment RGB count
 
-	movq    %mm4, TEMPV     #(V3V2V1V0)/256 -> mm4
+	movq    %mm4, MUNG(TEMPV)     #(V3V2V1V0)/256 -> mm4
 	movq    %mm6, %mm4      #B5B4G4R4 -> mm4
 
-	pmaddwd UR0GR, %mm6     #urR5,ugG4+urR4
+	pmaddwd MUNG(UR0GR), %mm6     #urR5,ugG4+urR4
 	movq    %mm0, %mm3      #B5G5R5B4 -> mm0
 
-	pmaddwd UBG0B, %mm0     #ubB5+ugG5,ubB4
+	pmaddwd MUNG(UBG0B), %mm0     #ubB5+ugG5,ubB4
 	paddd   %mm7, %mm2      #Y5Y4 -> mm2
 
-	pmaddwd         VR0GR, %mm4     #vrR5,vgG4+vrR4 -> mm4
+	pmaddwd         MUNG(VR0GR), %mm4     #vrR5,vgG4+vrR4 -> mm4
 	pxor    %mm7, %mm7      #0 -> mm7
 
-	pmaddwd VBG0B, %mm3     #vbB5+vgG5,vbB4 -> mm3
+	pmaddwd MUNG(VBG0B), %mm3     #vbB5+vgG5,vbB4 -> mm3
 	punpckhbw       %mm7, %mm1      #B7G7R7B6 -> mm1
 
 	paddd   %mm6, %mm0      #U5U4 -> mm0
 	movq    %mm1, %mm6      #B7G7R7B6 -> mm6
 
-	pmaddwd YBG0B, %mm6     #ybB7+ygG7,ybB6 -> mm6
+	pmaddwd MUNG(YBG0B), %mm6     #ybB7+ygG7,ybB6 -> mm6
 	punpckhbw       %mm7, %mm5      #R7B6G6R6 -> mm5
 
 	movq    %mm5, %mm7      #R7B6G6R6 -> mm7
 	paddd   %mm4, %mm3      #V5V4 -> mm3
 
-	pmaddwd YR0GR, %mm5     #yrR7,ygG6+yrR6 -> mm5
+	pmaddwd MUNG(YR0GR), %mm5     #yrR7,ygG6+yrR6 -> mm5
 	movq    %mm1, %mm4      #B7G7R7B6 -> mm4
 
-	pmaddwd UBG0B, %mm4     #ubB7+ugG7,ubB6 -> mm4
+	pmaddwd MUNG(UBG0B), %mm4     #ubB7+ugG7,ubB6 -> mm4
 	psrad   $FIXPSHIFT, %mm0       #32-bit scaled U5U4 -> mm0
 
 	psrad   $FIXPSHIFT, %mm2       #32-bit scaled Y5Y4 -> mm2
@@ -310,25 +312,25 @@ rgbtoycb_mmx_loop: 
 	paddd   %mm5, %mm6      #Y7Y6 -> mm6
 	movq    %mm7, %mm5      #R7B6G6R6 -> mm5
 
-	pmaddwd UR0GR, %mm7     #urR7,ugG6+ugR6 -> mm7
+	pmaddwd MUNG(UR0GR), %mm7     #urR7,ugG6+ugR6 -> mm7
 	psrad   $FIXPSHIFT, %mm3       #32-bit scaled V5V4 -> mm3
 
-	pmaddwd VBG0B, %mm1     #vbB7+vgG7,vbB6 -> mm1
+	pmaddwd MUNG(VBG0B), %mm1     #vbB7+vgG7,vbB6 -> mm1
 	psrad   $FIXPSHIFT, %mm6 #32-bit scaled Y7Y6 -> mm6
 
 	packssdw %mm6, %mm2     #Y7Y6Y5Y4 -> mm2
 
-	pmaddwd VR0GR, %mm5     #vrR7,vgG6+vrR6 -> mm5
+	pmaddwd MUNG(VR0GR), %mm5     #vrR7,vgG6+vrR6 -> mm5
 	paddd   %mm4, %mm7      #U7U6 -> mm7    
 
 	psrad   $FIXPSHIFT, %mm7       #32-bit scaled U7U6 -> mm7
-	paddw	OFFSETY, %mm2
+	paddw	MUNG(OFFSETY), %mm2
 	movq	%mm2, (%ebx)    #store Y7Y6Y5Y4 
 
-	movq	ALLONE, %mm6
+	movq	MUNG(ALLONE), %mm6
 	packssdw %mm7, %mm0     #32-bit scaled U7U6U5U4 -> mm0
 
-	movq    TEMPU, %mm4     #32-bit scaled U3U2U1U0 -> mm4
+	movq    MUNG(TEMPU), %mm4     #32-bit scaled U3U2U1U0 -> mm4
 	pmaddwd	%mm6, %mm0      #U7U6U5U4 averaged -> (U7U6)(U5U4)=UU3 UU2->mm0
 	
 	pmaddwd	%mm6, %mm4      #U3U2U1U0 averaged -> (U3U2)(U1U0)=UU1 UU0->mm4
@@ -338,8 +340,8 @@ rgbtoycb_mmx_loop: 
 
 	psrad   $FIXPSHIFT, %mm1       #32-bit scaled V7V6 -> mm1
 	psraw	$1, %mm4 	#divide UU3 UU2 UU1 UU0 by 2 -> mm4
-		
-	movq    TEMPV, %mm5     #32-bit scaled V3V2V1V0 -> mm5
+
+	movq    MUNG(TEMPV), %mm5     #32-bit scaled V3V2V1V0 -> mm5
 
 	movq	%mm4, (%ecx)    # store U	
 
@@ -422,14 +426,15 @@ _dv_ppm_copy_y_block_mmx:
 _dv_pgm_copy_y_block_mmx:
 
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl	%edi
-	
-	movl    8(%ebp), %edi          # dest
-	movl    12(%ebp), %esi         # src
 
-	movq	OFFSETY, %mm7
+	LOAD_PIC_REG(bp)
+
+	movl    16(%esp), %edi          # dest
+	movl    20(%esp), %esi         # src
+
+	movq	MUNG(OFFSETY), %mm7
 	pxor	%mm6, %mm6
 	
 	movq	(%esi), %mm0
@@ -564,14 +571,15 @@ _dv_pgm_copy_y_block_mmx:
 _dv_video_copy_y_block_mmx:
 
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl	%edi
-	
-	movl    8(%ebp), %edi          # dest
-	movl    12(%ebp), %esi         # src
 
-	movq	OFFSETBX, %mm7
+	LOAD_PIC_REG(bp)
+
+	movl    16(%esp), %edi          # dest
+	movl    20(%esp), %esi         # src
+
+	movq	MUNG(OFFSETBX), %mm7
 	pxor	%mm6, %mm6
 	
 	movq	(%esi), %mm0
@@ -852,16 +864,16 @@ _dv_ppm_copy_pal_c_block_mmx:
 _dv_pgm_copy_pal_c_block_mmx:
 				
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl	%edi
 	pushl	%ebx
-	
-	movl    8(%ebp), %edi          # dest
-	movl    12(%ebp), %esi         # src
 
+	LOAD_PIC_REG(bp)
+
+	movl    20(%esp), %edi          # dest
+	movl    24(%esp), %esi         # src
 
-	movq	OFFSETBX, %mm7
+	movq	MUNG(OFFSETBX), %mm7
 	pxor	%mm6, %mm6
 
 	
@@ -1000,15 +1014,16 @@ _dv_pgm_copy_pal_c_block_mmx:
 _dv_video_copy_pal_c_block_mmx:
 				
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl	%edi
 	pushl	%ebx
-	
-	movl    8(%ebp), %edi          # dest
-	movl    12(%ebp), %esi         # src
 
-	movq	OFFSETBX, %mm7
+	LOAD_PIC_REG(bp)
+
+	movl    20(%esp), %edi          # dest
+	movl    24(%esp), %esi         # src
+
+	movq	MUNG(OFFSETBX), %mm7
 	paddw	%mm7, %mm7
 	pxor	%mm6, %mm6
 
@@ -1095,18 +1112,18 @@ video_copy_pal_c_block_mmx_loop:	
 _dv_ppm_copy_ntsc_c_block_mmx:
 				
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl	%edi
 	pushl	%ebx
-	
-	movl    8(%ebp), %edi          # dest
-	movl    12(%ebp), %esi         # src
+
+	LOAD_PIC_REG(bp)
+
+	movl    20(%esp), %edi          # dest
+	movl    24(%esp), %esi         # src
 
 	movl	$4, %ebx	
 
-	movq	ALLONE, %mm6
-	
+	movq	MUNG(ALLONE), %mm6
 ppm_copy_ntsc_c_block_mmx_loop:	
 	
 	movq	(%esi), %mm0
@@ -1168,14 +1187,15 @@ ppm_copy_ntsc_c_block_mmx_loop:	
 _dv_pgm_copy_ntsc_c_block_mmx:
 				
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl	%edi
-	
-	movl    8(%ebp), %edi          # dest
-	movl    12(%ebp), %esi         # src
 
-	movq	OFFSETBX, %mm7
+	LOAD_PIC_REG(bp)
+
+	movl    16(%esp), %edi          # dest
+	movl    20(%esp), %esi         # src
+
+	movq	MUNG(OFFSETBX), %mm7
 	paddw	%mm7, %mm7
 	pxor	%mm6, %mm6
 
@@ -1325,15 +1347,16 @@ _dv_pgm_copy_ntsc_c_block_mmx:
 _dv_video_copy_ntsc_c_block_mmx:
 				
 	pushl   %ebp
-	movl    %esp, %ebp
 	pushl   %esi
 	pushl	%edi
 	pushl	%ebx
-	
-	movl    8(%ebp), %edi          # dest
-	movl    12(%ebp), %esi         # src
 
-	movq	OFFSETBX, %mm7
+	LOAD_PIC_REG(bp)
+
+	movl    20(%esp), %edi          # dest
+	movl    24(%esp), %esi         # src
+
+	movq	MUNG(OFFSETBX), %mm7
 	paddw	%mm7, %mm7
 	pxor	%mm6, %mm6
 
--- libdv-0.104-old/libdv/rgbtoyuv_x86_64.S
+++ libdv-0.104/libdv/rgbtoyuv_x86_64.S
@@ -41,9 +41,6 @@
 #define DV_WIDTH_SHORT_HALF 720
 #define DV_WIDTH_BYTE_HALF  360	
 		
-.global _dv_rgbtoycb_mmx_x86_64
-# .global yuvtoycb_mmx_x86_64
-
 .data
 
 .align 8
--- libdv-0.104-old/libdv/vlc_x86.S
+++ libdv-0.104/libdv/vlc_x86.S
@@ -1,31 +1,39 @@
 	#include "asmoff.h"
 .section .note.GNU-stack, "", @progbits
+	#include "asm_common.S"
 
 .text
 	.align 4
 .globl dv_decode_vlc 
+.globl asm_dv_decode_vlc 
+.hidden asm_dv_decode_vlc
+asm_dv_decode_vlc = dv_decode_vlc
+
 	.type	 dv_decode_vlc,@function
 dv_decode_vlc:
 	pushl %ebx
+	pushl %ebp
+
+	LOAD_PIC_REG(bp)
 
-	/* Args are at 8(%esp). */
-	movl  8(%esp),%eax		/* %eax is bits */
-	movl  12(%esp),%ebx		/* %ebx is maxbits */
+	/* Args are at 12(%esp). */
+	movl  12(%esp),%eax		/* %eax is bits */
+	movl  16(%esp),%ebx		/* %ebx is maxbits */
 	andl  $0x3f,%ebx		/* limit index range STL*/
 
-	movl  dv_vlc_class_index_mask(,%ebx,4),%edx
+	movl  MUNG_ARR(dv_vlc_class_index_mask,%ebx,4),%edx
 	andl  %eax,%edx
-	movl  dv_vlc_class_index_rshift(,%ebx,4),%ecx
+	movl  MUNG_ARR(dv_vlc_class_index_rshift,%ebx,4),%ecx
 	sarl  %cl,%edx
-	movl  dv_vlc_classes(,%ebx,4),%ecx
+	movl  MUNG_ARR(dv_vlc_classes,%ebx,4),%ecx
 	movsbl  (%ecx,%edx,1),%edx	/* %edx is class */
 			
-	movl  dv_vlc_index_mask(,%edx,4),%ebx
-	movl  dv_vlc_index_rshift(,%edx,4),%ecx
+	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
+	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
 	andl  %eax,%ebx
 	sarl  %cl,%ebx
 
-	movl  dv_vlc_lookups(,%edx,4),%edx
+	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
 	movl  (%edx,%ebx,4),%edx
 
 	/* Now %edx holds result, like this:
@@ -42,7 +51,7 @@ dv_decode_vlc:
 	movl  %edx,%ecx
 	sarl  $8,%ecx
 	andl  $0xff,%ecx
-	movl  sign_mask(,%ecx,4),%ebx
+	movl  MUNG_ARR(sign_mask,%ecx,4),%ebx
 	andl  %ebx,%eax
 	negl  %eax
 	sarl  $31,%eax
@@ -63,14 +72,14 @@ dv_decode_vlc:
 	    *result = broken;
 	Note that the 'broken' pattern is all ones (i.e. 0xffffffff)
 	*/
-	movl  12(%esp),%ebx		/* %ebx is maxbits */
+	movl  16(%esp),%ebx		/* %ebx is maxbits */
 	subl  %ecx,%ebx
 	sbbl  %ebx,%ebx
 	orl   %ebx,%edx
 
-	movl  16(%esp),%eax
+	movl  20(%esp),%eax
 	movl  %edx,(%eax)
-	
+	popl  %ebp
 	popl  %ebx
 	ret
 	
@@ -80,21 +89,28 @@ dv_decode_vlc:
 	.type	 __dv_decode_vlc,@function
 __dv_decode_vlc:
 	pushl %ebx
+	pushl %ebp
+
+	LOAD_PIC_REG(bp)
 
-	/* Args are at 8(%esp). */
-	movl  8(%esp),%eax		/* %eax is bits */
+	/* Args are at 12(%esp). */
+	movl  12(%esp),%eax		/* %eax is bits */
 	
 	movl  %eax,%edx			/* %edx is class */
 	andl  $0xfe00,%edx
 	sarl  $9,%edx
+#ifdef __PIC__
+	movsbl dv_vlc_class_lookup5@GOTOFF(%ebp,%edx),%edx
+#else
 	movsbl dv_vlc_class_lookup5(%edx),%edx
-	
-	movl  dv_vlc_index_mask(,%edx,4),%ebx
-	movl  dv_vlc_index_rshift(,%edx,4),%ecx
+#endif
+
+	movl  MUNG_ARR(dv_vlc_index_mask,%edx,4),%ebx
+	movl  MUNG_ARR(dv_vlc_index_rshift,%edx,4),%ecx
 	andl  %eax,%ebx
 	sarl  %cl,%ebx
 
-	movl  dv_vlc_lookups(,%edx,4),%edx
+	movl  MUNG_ARR(dv_vlc_lookups,%edx,4),%edx
 	movl  (%edx,%ebx,4),%edx
 
 	/* Now %edx holds result, like this:
@@ -112,7 +128,7 @@ __dv_decode_vlc:
 	movl  %edx,%ecx
 	sarl  $8,%ecx
 	andl  $0xff,%ecx
-	movl  sign_mask(,%ecx,4),%ecx
+	movl  MUNG_ARR(sign_mask,%ecx,4),%ecx
 	andl  %ecx,%eax
 	negl  %eax
 	sarl  $31,%eax
@@ -127,9 +143,9 @@ __dv_decode_vlc:
 	xorl  %eax,%edx
 	subl  %eax,%edx
 
-	movl  12(%esp),%eax
+	movl  16(%esp),%eax
 	movl  %edx,(%eax)
-	
+	popl  %ebp
 	popl  %ebx
 	ret
 
@@ -140,14 +156,20 @@ void dv_parse_ac_coeffs_pass0(bitstream_
 */
 .text
 	.align	4
+.globl asm_dv_parse_ac_coeffs_pass0
+.hidden asm_dv_parse_ac_coeffs_pass0
+	asm_dv_parse_ac_coeffs_pass0 = dv_parse_ac_coeffs_pass0
+
 .globl	dv_parse_ac_coeffs_pass0
 .type	dv_parse_ac_coeffs_pass0,@function
 dv_parse_ac_coeffs_pass0:
 	pushl	%ebx
 	pushl	%edi
 	pushl	%esi
 	pushl	%ebp
 
+	LOAD_PIC_REG(si)
+
 #define ARGn(N)  (20+(4*(N)))(%esp)
 
 	/*
@@ -159,8 +182,10 @@ dv_parse_ac_coeffs_pass0:
 	ebp	bl
 	*/
 	movl    ARGn(2),%ebp
+#ifndef __PIC__
 	movl	ARGn(0),%esi
 	movl	bitstream_t_buf(%esi),%esi
+#endif
 	movl	dv_block_t_offset(%ebp),%edi
 	movl	dv_block_t_reorder(%ebp),%ebx
 
@@ -170,7 +195,11 @@ dv_parse_ac_coeffs_pass0:
 	
 	movq    dv_block_t_coeffs(%ebp),%mm1
 	pxor    %mm0,%mm0
+#ifdef __PIC__
+	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
+#else
 	pand    const_f_0_0_0,%mm1
+#endif
 	movq    %mm1,dv_block_t_coeffs(%ebp)
 	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
 	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
@@ -191,9 +220,17 @@ dv_parse_ac_coeffs_pass0:
 readloop:
 	movl	%edi,%ecx
 	shrl	$3,%ecx
+#ifdef __PIC__
+	movl    ARGn(0),%eax
+	addl    bitstream_t_buf(%eax),%ecx
+	movzbl  (%ecx),%eax
+	movzbl  1(%ecx),%edx
+	movzbl  2(%ecx),%ecx
+#else
 	movzbl  (%esi,%ecx,1),%eax
 	movzbl  1(%esi,%ecx,1),%edx
 	movzbl  2(%esi,%ecx,1),%ecx
+#endif
 	shll	$16,%eax
 	shll	$8,%edx
 	orl	%ecx,%eax
@@ -217,7 +254,11 @@ readloop:
 
 	/* Attempt to use the shortcut first.  If it hits, then
 	   this vlc term has been decoded. */
+#ifdef __PIC__
+	movl	dv_vlc_class1_shortcut@GOTOFF(%esi,%ecx,4),%edx
+#else
 	movl	dv_vlc_class1_shortcut(,%ecx,4),%edx
+#endif
 	test	$0x80,%edx
 	je	done_decode
 
@@ -228,12 +269,19 @@ readloop:
 	movl	%ebx,dv_block_t_reorder(%ebp)
 
 	/* %eax is bits */
-	
+#ifdef __PIC__
+	movsbl dv_vlc_class_lookup5@GOTOFF(%esi,%ecx),%ecx
+
+	movl  dv_vlc_index_mask@GOTOFF(%esi,%ecx,4),%ebx
+	movl  dv_vlc_lookups@GOTOFF(%esi,%ecx,4),%edx
+	movl  dv_vlc_index_rshift@GOTOFF(%esi,%ecx,4),%ecx
+#else
 	movsbl dv_vlc_class_lookup5(%ecx),%ecx
 
 	movl  dv_vlc_index_mask(,%ecx,4),%ebx
 	movl  dv_vlc_lookups(,%ecx,4),%edx
 	movl  dv_vlc_index_rshift(,%ecx,4),%ecx
+#endif
 	andl  %eax,%ebx
 	sarl  %cl,%ebx
 
@@ -256,7 +304,11 @@ readloop:
 	movl  %edx,%ecx
 	sarl  $8,%ecx
 	andl  $0xff,%ecx
+#ifdef __PIC__
+	movl  sign_mask@GOTOFF(%esi,%ecx,4),%ecx
+#else
 	movl  sign_mask(,%ecx,4),%ecx
+#endif
 	andl  %ecx,%eax
 	negl  %eax
 	sarl  $31,%eax
@@ -326,10 +378,16 @@ alldone:
 
 slowpath:
 	/* slow path:	 use dv_decode_vlc */;
+#ifdef __PIC__
+	pushl	%esi
+	leal	vlc@GOTOFF(%esi),%esi
+	xchgl	%esi,(%esp)	/* last parameter is &vlc */
+#else
 	pushl	$vlc		/* last parameter is &vlc */
+#endif
 	pushl	%edx		/* bits_left */
 	pushl	%eax		/* bits */
-	call	dv_decode_vlc
+	call	asm_dv_decode_vlc
 	addl	$12,%esp
 	test	$0x80,%edx	/* If (vlc.run < 0) break */
 	jne	escape
@@ -359,6 +417,8 @@ show16:
 	pushl	%esi
 	pushl	%ebp
 
+	LOAD_PIC_REG(si)
+
 #define ARGn(N)  (20+(4*(N)))(%esp)
 
 	movl	ARGn(1),%eax			/* quality */
@@ -373,7 +434,11 @@ dv_parse_video_segment:
 	jz	its_mono
 	movl	$6,%ebx
 its_mono:
+#ifdef __PIC__
+	movl	%ebx,n_blocks@GOTOFF(%esi)
+#else
 	movl	%ebx,n_blocks
+#endif
 	
 	/*
 	 *	ebx	seg/b
@@ -384,15 +449,22 @@ its_mono:
 	 *	ebp	bl
 	 */
 	movl	ARGn(0),%ebx
+#ifndef __PIC__
 	movl	dv_videosegment_t_bs(%ebx),%esi
 	movl	bitstream_t_buf(%esi),%esi
+#endif
 	leal	dv_videosegment_t_mb(%ebx),%edi
 
 	movl	$0,%eax
 	movl	$0,%ecx
 macloop:
+#ifdef __PIC__
+	movl	%eax,m@GOTOFF(%esi)
+	movl	%ecx,mb_start@GOTOFF(%esi)
+#else
 	movl	%eax,m
 	movl	%ecx,mb_start
+#endif
 
 	movl	ARGn(0),%ebx
 	
@@ -400,7 +472,13 @@ macloop:
 	/* mb->qno = bitstream_get(bs,4); */
 	movl	%ecx,%edx
 	shr	$3,%edx
+#ifdef __PIC__
+	movl	dv_videosegment_t_bs(%ebx),%ecx
+	movl	bitstream_t_buf(%ecx),%ecx
+	movzbl	3(%ecx,%edx,1),%edx
+#else
 	movzbl	3(%esi,%edx,1),%edx
+#endif
 	andl	$0xf,%edx
 	movl	%edx,dv_macroblock_t_qno(%edi)
 
@@ -411,7 +489,11 @@ macloop:
 	movl	%edx,dv_macroblock_t_eob_count(%edi)
 
 	/* mb->i = (seg->i + dv_super_map_vertical[m]) % (seg->isPAL?12:10); */
+#ifdef __PIC__
+	movl	dv_super_map_vertical@GOTOFF(%esi,%eax,4),%edx
+#else
 	movl	dv_super_map_vertical(,%eax,4),%edx
+#endif
 	movl	dv_videosegment_t_i(%ebx),%ecx
 	addl	%ecx,%edx
 
@@ -422,11 +504,20 @@ skarly:	
 	andl	$1,%ecx
 	shll	$5,%ecx		/* ecx = (isPAL ? 32 : 0) */
 
+#ifdef __PIC__
+	leal	mod_10@GOTOFF(%esi),%edx
+	movzbl	(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
+#else
 	movzbl	mod_10(%edx,%ecx,1),%edx	/* uses mod_12 for PAL */
+#endif
 	movl	%edx,dv_macroblock_t_i(%edi)
 
 	/*  mb->j = dv_super_map_horizontal[m]; */	
+#ifdef __PIC__
+	movl	dv_super_map_horizontal@GOTOFF(%esi,%eax,4),%edx
+#else
 	movl	dv_super_map_horizontal(,%eax,4),%edx
+#endif
 	movl	%edx,dv_macroblock_t_j(%edi)
 
 	/* mb->k = seg->k; */
@@ -445,12 +536,28 @@ blkloop:
 	        +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
 	*/
 	/* dc = bitstream_get(bs,9); */
+#ifdef __PIC__
+	movl	mb_start@GOTOFF(%esi),%ecx
+#else
 	movl	mb_start,%ecx
+#endif
 	shr	$3,%ecx
+#ifdef __PIC__
+	movzbl	blk_start@GOTOFF(%esi,%ebx),%edx
+#else
 	movzbl	blk_start(%ebx),%edx
+#endif
 	addl	%ecx,%edx
+#ifdef __PIC__
+	movl	ARGn(0),%ecx
+	movl	dv_videosegment_t_bs(%ecx),%ecx
+	movl	bitstream_t_buf(%ecx),%ecx
+	movzbl	(%ecx,%edx,1),%eax	/* hi byte */
+	movzbl	1(%ecx,%edx,1),%ecx	/* lo byte */
+#else
 	movzbl	(%esi,%edx,1),%eax	/* hi byte */
 	movzbl	1(%esi,%edx,1),%ecx	/* lo byte */
+#endif
 	shll	$8,%eax
 	orl	%ecx,%eax
 
@@ -477,7 +584,11 @@ blkloop:
 
 	/* bl->reorder = &dv_reorder[bl->dct_mode][1]; */
 	shll	$6,%eax
+#ifdef __PIC__
+	leal	dv_reorder@GOTOFF+1(%esi,%eax),%eax
+#else
 	addl	$(dv_reorder+1),%eax
+#endif
 	movl	%eax,dv_block_t_reorder(%ebp)
 
 	/* bl->reorder_sentinel = bl->reorder + 63; */
@@ -485,13 +596,22 @@ blkloop:
 	movl	%eax,dv_block_t_reorder_sentinel(%ebp)
 
 	/* bl->offset= mb_start + dv_parse_bit_start[b]; */
+#ifdef __PIC__
+	movl	mb_start@GOTOFF(%esi),%ecx
+	movl	dv_parse_bit_start@GOTOFF(%esi,%ebx,4),%eax
+#else
 	movl	mb_start,%ecx
 	movl	dv_parse_bit_start(,%ebx,4),%eax
+#endif
 	addl	%ecx,%eax
 	movl	%eax,dv_block_t_offset(%ebp)
 
 	/* bl->end= mb_start + dv_parse_bit_end[b]; */
+#ifdef __PIC__
+	movl	dv_parse_bit_end@GOTOFF(%esi,%ebx,4),%eax
+#else
 	movl	dv_parse_bit_end(,%ebx,4),%eax
+#endif
 	addl	%ecx,%eax
 	movl	%eax,dv_block_t_end(%ebp)
 
@@ -503,7 +623,11 @@ blkloop:
 	/* no AC pass.  Just zero out the remaining coeffs */
 	movq    dv_block_t_coeffs(%ebp),%mm1
 	pxor    %mm0,%mm0
+#ifdef __PIC__
+	pand    const_f_0_0_0@GOTOFF(%esi),%mm1
+#else
 	pand    const_f_0_0_0,%mm1
+#endif
 	movq    %mm1,dv_block_t_coeffs(%ebp)
 	movq    %mm0,(dv_block_t_coeffs + 8)(%ebp)
 	movq    %mm0,(dv_block_t_coeffs + 16)(%ebp)
@@ -528,18 +652,27 @@ do_ac_pass:
 	pushl	%ebp
 	pushl	%edi
 	pushl	%eax
-	call	dv_parse_ac_coeffs_pass0
+	call	asm_dv_parse_ac_coeffs_pass0
 	addl	$12,%esp
 done_ac:
 
+#ifdef __PIC__
+	movl	n_blocks@GOTOFF(%esi),%eax
+#else
 	movl	n_blocks,%eax
+#endif
 	addl	$dv_block_t_size,%ebp
 	incl	%ebx
 	cmpl	%eax,%ebx
 	jnz	blkloop
 
+#ifdef __PIC__
+	movl	m@GOTOFF(%esi),%eax
+	movl	mb_start@GOTOFF(%esi),%ecx
+#else
 	movl	m,%eax
 	movl	mb_start,%ecx
+#endif
 	addl	$(8 * 80),%ecx
 	addl	$dv_macroblock_t_size,%edi
 	incl	%eax
@@ -557,7 +690,7 @@ done_ac:
 
 	andl	$DV_QUALITY_AC_MASK,%eax
 	cmpl	$DV_QUALITY_AC_2,%eax
-	jz	dv_parse_ac_coeffs
+	jz	asm_dv_parse_ac_coeffs
 	movl	$0,%eax
 	ret