|
 |
83c29f |
diff --git a/Configure b/Configure
|
|
 |
83c29f |
index 9c803dc..5a5c2d8 100755
|
|
 |
83c29f |
--- a/Configure
|
|
 |
83c29f |
+++ b/Configure
|
|
 |
83c29f |
@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes
|
|
 |
83c29f |
my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
|
|
 |
83c29f |
my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
|
|
 |
83c29f |
my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
|
|
 |
83c29f |
-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
|
|
 |
83c29f |
-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
|
|
 |
83c29f |
+my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:";
|
|
 |
83c29f |
+my $ppc32_asm=$ppc64_asm;
|
|
 |
83c29f |
my $no_asm=":::::::::::::::void";
|
|
 |
83c29f |
|
|
 |
83c29f |
# As for $BSDthreads. Idea is to maintain "collective" set of flags,
|
|
 |
83c29f |
@@ -357,6 +357,7 @@ my %table=(
|
|
 |
83c29f |
####
|
|
 |
83c29f |
"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
|
 |
83c29f |
"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
|
|
 |
83c29f |
+"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
|
|
 |
83c29f |
"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
|
 |
83c29f |
"linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
|
 |
83c29f |
"linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
|
|
 |
83c29f |
@@ -462,8 +463,8 @@ my %table=(
|
|
 |
83c29f |
|
|
 |
83c29f |
#### IBM's AIX.
|
|
 |
83c29f |
"aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
|
|
 |
83c29f |
-"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
|
|
 |
83c29f |
-"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
|
|
 |
83c29f |
+"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:$ppc32_asm:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
|
|
 |
83c29f |
+"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:$ppc64_asm:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
|
|
 |
83c29f |
# Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE
|
|
 |
83c29f |
# at build time. $OBJECT_MODE is respected at ./config stage!
|
|
 |
83c29f |
"aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
|
|
 |
83c29f |
@@ -1525,7 +1526,7 @@ else {
|
|
 |
83c29f |
$wp_obj="wp_block.o";
|
|
 |
83c29f |
}
|
|
 |
83c29f |
$cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/);
|
|
 |
83c29f |
-if ($modes_obj =~ /ghash/)
|
|
 |
83c29f |
+if ($modes_obj =~ /ghash\-/)
|
|
 |
83c29f |
{
|
|
 |
83c29f |
$cflags.=" -DGHASH_ASM";
|
|
 |
83c29f |
}
|
|
 |
83c29f |
diff --git a/config b/config
|
|
 |
83c29f |
index 88b9bc6..8b80802 100755
|
|
 |
83c29f |
--- a/config
|
|
 |
83c29f |
+++ b/config
|
|
 |
83c29f |
@@ -587,13 +587,20 @@ case "$GUESSOS" in
|
|
 |
83c29f |
fi
|
|
 |
83c29f |
;;
|
|
 |
83c29f |
ppc64-*-linux2)
|
|
 |
83c29f |
- echo "WARNING! If you wish to build 64-bit library, then you have to"
|
|
 |
83c29f |
- echo " invoke './Configure linux-ppc64' *manually*."
|
|
 |
83c29f |
- if [ "$TEST" = "false" -a -t 1 ]; then
|
|
 |
83c29f |
- echo " You have about 5 seconds to press Ctrl-C to abort."
|
|
 |
83c29f |
- (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
|
|
 |
83c29f |
+ if [ -z "$KERNEL_BITS" ]; then
|
|
 |
83c29f |
+ echo "WARNING! If you wish to build 64-bit library, then you have to"
|
|
 |
83c29f |
+ echo " invoke './Configure linux-ppc64' *manually*."
|
|
 |
83c29f |
+ if [ "$TEST" = "false" -a -t 1 ]; then
|
|
 |
83c29f |
+ echo " You have about 5 seconds to press Ctrl-C to abort."
|
|
 |
83c29f |
+ (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
|
|
 |
83c29f |
+ fi
|
|
 |
83c29f |
+ fi
|
|
 |
83c29f |
+ if [ "$KERNEL_BITS" = "64" ]; then
|
|
 |
83c29f |
+ OUT="linux-ppc64"
|
|
 |
83c29f |
+ else
|
|
 |
83c29f |
+ OUT="linux-ppc"
|
|
 |
83c29f |
+ (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32"
|
|
 |
83c29f |
fi
|
|
 |
83c29f |
- OUT="linux-ppc"
|
|
 |
83c29f |
;;
|
|
 |
83c29f |
ppc-*-linux2) OUT="linux-ppc" ;;
|
|
 |
83c29f |
ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
|
|
 |
83c29f |
diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile
|
|
 |
83c29f |
index 45ede0a..847f4ee 100644
|
|
 |
83c29f |
--- a/crypto/aes/Makefile
|
|
 |
83c29f |
+++ b/crypto/aes/Makefile
|
|
 |
83c29f |
@@ -71,6 +71,10 @@ aes-sparcv9.s: asm/aes-sparcv9.pl
|
|
 |
83c29f |
|
|
 |
83c29f |
aes-ppc.s: asm/aes-ppc.pl
|
|
 |
83c29f |
$(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
|
|
 |
83c29f |
+vpaes-ppc.s: asm/vpaes-ppc.pl
|
|
 |
83c29f |
+ $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@
|
|
 |
83c29f |
+aesp8-ppc.s: asm/aesp8-ppc.pl
|
|
 |
83c29f |
+ $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@
|
|
 |
83c29f |
|
|
 |
83c29f |
aes-parisc.s: asm/aes-parisc.pl
|
|
 |
83c29f |
$(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@
|
|
 |
83c29f |
diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl
|
|
 |
83c29f |
index 7c52cbe..7a99fc3 100644
|
|
 |
83c29f |
--- a/crypto/aes/asm/aes-ppc.pl
|
|
 |
83c29f |
+++ b/crypto/aes/asm/aes-ppc.pl
|
|
 |
83c29f |
@@ -45,6 +45,8 @@ if ($flavour =~ /64/) {
|
|
 |
83c29f |
$PUSH ="stw";
|
|
 |
83c29f |
} else { die "nonsense $flavour"; }
|
|
 |
83c29f |
|
|
 |
83c29f |
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
|
|
 |
83c29f |
+
|
|
 |
83c29f |
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
 |
83c29f |
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
|
 |
83c29f |
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
|
 |
83c29f |
@@ -68,7 +70,7 @@ $key="r5";
|
|
 |
83c29f |
$Tbl0="r3";
|
|
 |
83c29f |
$Tbl1="r6";
|
|
 |
83c29f |
$Tbl2="r7";
|
|
 |
83c29f |
-$Tbl3="r2";
|
|
 |
83c29f |
+$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
|
|
 |
83c29f |
|
|
 |
83c29f |
$s0="r8";
|
|
 |
83c29f |
$s1="r9";
|
|
 |
83c29f |
@@ -76,7 +78,7 @@ $s2="r10";
|
|
 |
83c29f |
$s3="r11";
|
|
 |
83c29f |
|
|
 |
83c29f |
$t0="r12";
|
|
 |
83c29f |
-$t1="r13";
|
|
 |
83c29f |
+$t1="r0"; # stay away from "r13";
|
|
 |
83c29f |
$t2="r14";
|
|
 |
83c29f |
$t3="r15";
|
|
 |
83c29f |
|
|
 |
83c29f |
@@ -100,9 +102,6 @@ $acc13="r29";
|
|
 |
83c29f |
$acc14="r30";
|
|
 |
83c29f |
$acc15="r31";
|
|
 |
83c29f |
|
|
 |
83c29f |
-# stay away from TLS pointer
|
|
 |
83c29f |
-if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
|
|
 |
83c29f |
-else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
|
|
 |
83c29f |
$mask80=$Tbl2;
|
|
 |
83c29f |
$mask1b=$Tbl3;
|
|
 |
83c29f |
|
|
 |
83c29f |
@@ -337,8 +336,7 @@ $code.=<<___;
|
|
 |
83c29f |
$STU $sp,-$FRAME($sp)
|
|
 |
83c29f |
mflr r0
|
|
 |
83c29f |
|
|
 |
83c29f |
- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
|
|
 |
83c29f |
- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
+ $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
|
|
 |
83c29f |
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
|
|
 |
83c29f |
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
|
|
 |
83c29f |
@@ -365,16 +363,61 @@ $code.=<<___;
|
|
 |
83c29f |
bne Lenc_unaligned
|
|
 |
83c29f |
|
|
 |
83c29f |
Lenc_unaligned_ok:
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if (!$LITTLE_ENDIAN);
|
|
 |
83c29f |
lwz $s0,0($inp)
|
|
 |
83c29f |
lwz $s1,4($inp)
|
|
 |
83c29f |
lwz $s2,8($inp)
|
|
 |
83c29f |
lwz $s3,12($inp)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if ($LITTLE_ENDIAN);
|
|
 |
83c29f |
+ lwz $t0,0($inp)
|
|
 |
83c29f |
+ lwz $t1,4($inp)
|
|
 |
83c29f |
+ lwz $t2,8($inp)
|
|
 |
83c29f |
+ lwz $t3,12($inp)
|
|
 |
83c29f |
+ rotlwi $s0,$t0,8
|
|
 |
83c29f |
+ rotlwi $s1,$t1,8
|
|
 |
83c29f |
+ rotlwi $s2,$t2,8
|
|
 |
83c29f |
+ rotlwi $s3,$t3,8
|
|
 |
83c29f |
+ rlwimi $s0,$t0,24,0,7
|
|
 |
83c29f |
+ rlwimi $s1,$t1,24,0,7
|
|
 |
83c29f |
+ rlwimi $s2,$t2,24,0,7
|
|
 |
83c29f |
+ rlwimi $s3,$t3,24,0,7
|
|
 |
83c29f |
+ rlwimi $s0,$t0,24,16,23
|
|
 |
83c29f |
+ rlwimi $s1,$t1,24,16,23
|
|
 |
83c29f |
+ rlwimi $s2,$t2,24,16,23
|
|
 |
83c29f |
+ rlwimi $s3,$t3,24,16,23
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
bl LAES_Te
|
|
 |
83c29f |
bl Lppc_AES_encrypt_compact
|
|
 |
83c29f |
+ $POP $out,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if ($LITTLE_ENDIAN);
|
|
 |
83c29f |
+ rotlwi $t0,$s0,8
|
|
 |
83c29f |
+ rotlwi $t1,$s1,8
|
|
 |
83c29f |
+ rotlwi $t2,$s2,8
|
|
 |
83c29f |
+ rotlwi $t3,$s3,8
|
|
 |
83c29f |
+ rlwimi $t0,$s0,24,0,7
|
|
 |
83c29f |
+ rlwimi $t1,$s1,24,0,7
|
|
 |
83c29f |
+ rlwimi $t2,$s2,24,0,7
|
|
 |
83c29f |
+ rlwimi $t3,$s3,24,0,7
|
|
 |
83c29f |
+ rlwimi $t0,$s0,24,16,23
|
|
 |
83c29f |
+ rlwimi $t1,$s1,24,16,23
|
|
 |
83c29f |
+ rlwimi $t2,$s2,24,16,23
|
|
 |
83c29f |
+ rlwimi $t3,$s3,24,16,23
|
|
 |
83c29f |
+ stw $t0,0($out)
|
|
 |
83c29f |
+ stw $t1,4($out)
|
|
 |
83c29f |
+ stw $t2,8($out)
|
|
 |
83c29f |
+ stw $t3,12($out)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if (!$LITTLE_ENDIAN);
|
|
 |
83c29f |
stw $s0,0($out)
|
|
 |
83c29f |
stw $s1,4($out)
|
|
 |
83c29f |
stw $s2,8($out)
|
|
 |
83c29f |
stw $s3,12($out)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
b Lenc_done
|
|
 |
83c29f |
|
|
 |
83c29f |
Lenc_unaligned:
|
|
 |
83c29f |
@@ -417,6 +460,7 @@ Lenc_xpage:
|
|
 |
83c29f |
|
|
 |
83c29f |
bl LAES_Te
|
|
 |
83c29f |
bl Lppc_AES_encrypt_compact
|
|
 |
83c29f |
+ $POP $out,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
|
|
 |
83c29f |
extrwi $acc00,$s0,8,0
|
|
 |
83c29f |
extrwi $acc01,$s0,8,8
|
|
 |
83c29f |
@@ -449,8 +493,6 @@ Lenc_xpage:
|
|
 |
83c29f |
|
|
 |
83c29f |
Lenc_done:
|
|
 |
83c29f |
$POP r0,`$FRAME+$LRSAVE`($sp)
|
|
 |
83c29f |
- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
|
|
 |
83c29f |
- $POP r13,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
$POP r14,`$FRAME-$SIZE_T*18`($sp)
|
|
 |
83c29f |
$POP r15,`$FRAME-$SIZE_T*17`($sp)
|
|
 |
83c29f |
$POP r16,`$FRAME-$SIZE_T*16`($sp)
|
|
 |
83c29f |
@@ -764,6 +806,7 @@ Lenc_compact_done:
|
|
 |
83c29f |
blr
|
|
 |
83c29f |
.long 0
|
|
 |
83c29f |
.byte 0,12,0x14,0,0,0,0,0
|
|
 |
83c29f |
+.size .AES_encrypt,.-.AES_encrypt
|
|
 |
83c29f |
|
|
 |
83c29f |
.globl .AES_decrypt
|
|
 |
83c29f |
.align 7
|
|
 |
83c29f |
@@ -771,8 +814,7 @@ Lenc_compact_done:
|
|
 |
83c29f |
$STU $sp,-$FRAME($sp)
|
|
 |
83c29f |
mflr r0
|
|
 |
83c29f |
|
|
 |
83c29f |
- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
|
|
 |
83c29f |
- $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
+ $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
|
|
 |
83c29f |
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
|
|
 |
83c29f |
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
|
|
 |
83c29f |
@@ -799,16 +841,61 @@ Lenc_compact_done:
|
|
 |
83c29f |
bne Ldec_unaligned
|
|
 |
83c29f |
|
|
 |
83c29f |
Ldec_unaligned_ok:
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if (!$LITTLE_ENDIAN);
|
|
 |
83c29f |
lwz $s0,0($inp)
|
|
 |
83c29f |
lwz $s1,4($inp)
|
|
 |
83c29f |
lwz $s2,8($inp)
|
|
 |
83c29f |
lwz $s3,12($inp)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if ($LITTLE_ENDIAN);
|
|
 |
83c29f |
+ lwz $t0,0($inp)
|
|
 |
83c29f |
+ lwz $t1,4($inp)
|
|
 |
83c29f |
+ lwz $t2,8($inp)
|
|
 |
83c29f |
+ lwz $t3,12($inp)
|
|
 |
83c29f |
+ rotlwi $s0,$t0,8
|
|
 |
83c29f |
+ rotlwi $s1,$t1,8
|
|
 |
83c29f |
+ rotlwi $s2,$t2,8
|
|
 |
83c29f |
+ rotlwi $s3,$t3,8
|
|
 |
83c29f |
+ rlwimi $s0,$t0,24,0,7
|
|
 |
83c29f |
+ rlwimi $s1,$t1,24,0,7
|
|
 |
83c29f |
+ rlwimi $s2,$t2,24,0,7
|
|
 |
83c29f |
+ rlwimi $s3,$t3,24,0,7
|
|
 |
83c29f |
+ rlwimi $s0,$t0,24,16,23
|
|
 |
83c29f |
+ rlwimi $s1,$t1,24,16,23
|
|
 |
83c29f |
+ rlwimi $s2,$t2,24,16,23
|
|
 |
83c29f |
+ rlwimi $s3,$t3,24,16,23
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
bl LAES_Td
|
|
 |
83c29f |
bl Lppc_AES_decrypt_compact
|
|
 |
83c29f |
+ $POP $out,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if ($LITTLE_ENDIAN);
|
|
 |
83c29f |
+ rotlwi $t0,$s0,8
|
|
 |
83c29f |
+ rotlwi $t1,$s1,8
|
|
 |
83c29f |
+ rotlwi $t2,$s2,8
|
|
 |
83c29f |
+ rotlwi $t3,$s3,8
|
|
 |
83c29f |
+ rlwimi $t0,$s0,24,0,7
|
|
 |
83c29f |
+ rlwimi $t1,$s1,24,0,7
|
|
 |
83c29f |
+ rlwimi $t2,$s2,24,0,7
|
|
 |
83c29f |
+ rlwimi $t3,$s3,24,0,7
|
|
 |
83c29f |
+ rlwimi $t0,$s0,24,16,23
|
|
 |
83c29f |
+ rlwimi $t1,$s1,24,16,23
|
|
 |
83c29f |
+ rlwimi $t2,$s2,24,16,23
|
|
 |
83c29f |
+ rlwimi $t3,$s3,24,16,23
|
|
 |
83c29f |
+ stw $t0,0($out)
|
|
 |
83c29f |
+ stw $t1,4($out)
|
|
 |
83c29f |
+ stw $t2,8($out)
|
|
 |
83c29f |
+ stw $t3,12($out)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___ if (!$LITTLE_ENDIAN);
|
|
 |
83c29f |
stw $s0,0($out)
|
|
 |
83c29f |
stw $s1,4($out)
|
|
 |
83c29f |
stw $s2,8($out)
|
|
 |
83c29f |
stw $s3,12($out)
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
b Ldec_done
|
|
 |
83c29f |
|
|
 |
83c29f |
Ldec_unaligned:
|
|
 |
83c29f |
@@ -851,6 +938,7 @@ Ldec_xpage:
|
|
 |
83c29f |
|
|
 |
83c29f |
bl LAES_Td
|
|
 |
83c29f |
bl Lppc_AES_decrypt_compact
|
|
 |
83c29f |
+ $POP $out,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
|
|
 |
83c29f |
extrwi $acc00,$s0,8,0
|
|
 |
83c29f |
extrwi $acc01,$s0,8,8
|
|
 |
83c29f |
@@ -883,8 +971,6 @@ Ldec_xpage:
|
|
 |
83c29f |
|
|
 |
83c29f |
Ldec_done:
|
|
 |
83c29f |
$POP r0,`$FRAME+$LRSAVE`($sp)
|
|
 |
83c29f |
- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
|
|
 |
83c29f |
- $POP r13,`$FRAME-$SIZE_T*19`($sp)
|
|
 |
83c29f |
$POP r14,`$FRAME-$SIZE_T*18`($sp)
|
|
 |
83c29f |
$POP r15,`$FRAME-$SIZE_T*17`($sp)
|
|
 |
83c29f |
$POP r16,`$FRAME-$SIZE_T*16`($sp)
|
|
 |
83c29f |
@@ -1355,6 +1441,7 @@ Ldec_compact_done:
|
|
 |
83c29f |
blr
|
|
 |
83c29f |
.long 0
|
|
 |
83c29f |
.byte 0,12,0x14,0,0,0,0,0
|
|
 |
83c29f |
+.size .AES_decrypt,.-.AES_decrypt
|
|
 |
83c29f |
|
|
 |
83c29f |
.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
|
|
 |
83c29f |
.align 7
|
|
 |
83c29f |
diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
|
|
 |
83c29f |
new file mode 100755
|
|
 |
83c29f |
index 0000000..3ee8979
|
|
 |
83c29f |
--- /dev/null
|
|
 |
83c29f |
+++ b/crypto/aes/asm/aesp8-ppc.pl
|
|
 |
83c29f |
@@ -0,0 +1,1940 @@
|
|
 |
83c29f |
+#!/usr/bin/env perl
|
|
 |
83c29f |
+#
|
|
 |
83c29f |
+# ====================================================================
|
|
 |
83c29f |
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
 |
83c29f |
+# project. The module is, however, dual licensed under OpenSSL and
|
|
 |
83c29f |
+# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
 |
83c29f |
+# details see http://www.openssl.org/~appro/cryptogams/.
|
|
 |
83c29f |
+# ====================================================================
|
|
 |
83c29f |
+#
|
|
 |
83c29f |
+# This module implements support for AES instructions as per PowerISA
|
|
 |
83c29f |
+# specification version 2.07, first implemented by POWER8 processor.
|
|
 |
83c29f |
+# The module is endian-agnostic in sense that it supports both big-
|
|
 |
83c29f |
+# and little-endian cases. Data alignment in parallelizable modes is
|
|
 |
83c29f |
+# handled with VSX loads and stores, which implies MSR.VSX flag being
|
|
 |
83c29f |
+# set. It should also be noted that ISA specification doesn't prohibit
|
|
 |
83c29f |
+# alignment exceptions for these instructions on page boundaries.
|
|
 |
83c29f |
+# Initially alignment was handled in pure AltiVec/VMX way [when data
|
|
 |
83c29f |
+# is aligned programmatically, which in turn guarantees exception-
|
|
 |
83c29f |
+# free execution], but it turned to hamper performance when vcipher
|
|
 |
83c29f |
+# instructions are interleaved. It's reckoned that eventual
|
|
 |
83c29f |
+# misalignment penalties at page boundaries are in average lower
|
|
 |
83c29f |
+# than additional overhead in pure AltiVec approach.
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$flavour = shift;
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+if ($flavour =~ /64/) {
|
|
 |
83c29f |
+ $SIZE_T =8;
|
|
 |
83c29f |
+ $LRSAVE =2*$SIZE_T;
|
|
 |
83c29f |
+ $STU ="stdu";
|
|
 |
83c29f |
+ $POP ="ld";
|
|
 |
83c29f |
+ $PUSH ="std";
|
|
 |
83c29f |
+ $UCMP ="cmpld";
|
|
 |
83c29f |
+ $SHL ="sldi";
|
|
 |
83c29f |
+} elsif ($flavour =~ /32/) {
|
|
 |
83c29f |
+ $SIZE_T =4;
|
|
 |
83c29f |
+ $LRSAVE =$SIZE_T;
|
|
 |
83c29f |
+ $STU ="stwu";
|
|
 |
83c29f |
+ $POP ="lwz";
|
|
 |
83c29f |
+ $PUSH ="stw";
|
|
 |
83c29f |
+ $UCMP ="cmplw";
|
|
 |
83c29f |
+ $SHL ="slwi";
|
|
 |
83c29f |
+} else { die "nonsense $flavour"; }
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
 |
83c29f |
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
|
 |
83c29f |
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
|
 |
83c29f |
+die "can't locate ppc-xlate.pl";
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$FRAME=8*$SIZE_T;
|
|
 |
83c29f |
+$prefix="aes_p8";
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$sp="r1";
|
|
 |
83c29f |
+$vrsave="r12";
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+#########################################################################
|
|
 |
83c29f |
+{{{ # Key setup procedures #
|
|
 |
83c29f |
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
|
|
 |
83c29f |
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
|
|
 |
83c29f |
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
+.machine "any"
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.text
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 7
|
|
 |
83c29f |
+rcon:
|
|
 |
83c29f |
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
|
|
 |
83c29f |
+.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
|
|
 |
83c29f |
+.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
|
|
 |
83c29f |
+.long 0,0,0,0 ?asis
|
|
 |
83c29f |
+Lconsts:
|
|
 |
83c29f |
+ mflr r0
|
|
 |
83c29f |
+ bcl 20,31,\$+4
|
|
 |
83c29f |
+ mflr $ptr #vvvvv "distance between . and rcon
|
|
 |
83c29f |
+ addi $ptr,$ptr,-0x48
|
|
 |
83c29f |
+ mtlr r0
|
|
 |
83c29f |
+ blr
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+ .byte 0,12,0x14,0,0,0,0,0
|
|
 |
83c29f |
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.globl .${prefix}_set_encrypt_key
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+.${prefix}_set_encrypt_key:
|
|
 |
83c29f |
+Lset_encrypt_key:
|
|
 |
83c29f |
+ mflr r11
|
|
 |
83c29f |
+ $PUSH r11,$LRSAVE($sp)
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ li $ptr,-1
|
|
 |
83c29f |
+ ${UCMP}i $inp,0
|
|
 |
83c29f |
+ beq- Lenc_key_abort # if ($inp==0) return -1;
|
|
 |
83c29f |
+ ${UCMP}i $out,0
|
|
 |
83c29f |
+ beq- Lenc_key_abort # if ($out==0) return -1;
|
|
 |
83c29f |
+ li $ptr,-2
|
|
 |
83c29f |
+ cmpwi $bits,128
|
|
 |
83c29f |
+ blt- Lenc_key_abort
|
|
 |
83c29f |
+ cmpwi $bits,256
|
|
 |
83c29f |
+ bgt- Lenc_key_abort
|
|
 |
83c29f |
+ andi. r0,$bits,0x3f
|
|
 |
83c29f |
+ bne- Lenc_key_abort
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lis r0,0xfff0
|
|
 |
83c29f |
+ mfspr $vrsave,256
|
|
 |
83c29f |
+ mtspr 256,r0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ bl Lconsts
|
|
 |
83c29f |
+ mtlr r11
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ neg r9,$inp
|
|
 |
83c29f |
+ lvx $in0,0,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,15 # 15 is not typo
|
|
 |
83c29f |
+ lvsr $key,0,r9 # borrow $key
|
|
 |
83c29f |
+ li r8,0x20
|
|
 |
83c29f |
+ cmpwi $bits,192
|
|
 |
83c29f |
+ lvx $in1,0,$inp
|
|
 |
83c29f |
+ le?vspltisb $mask,0x0f # borrow $mask
|
|
 |
83c29f |
+ lvx $rcon,0,$ptr
|
|
 |
83c29f |
+ le?vxor $key,$key,$mask # adjust for byte swap
|
|
 |
83c29f |
+ lvx $mask,r8,$ptr
|
|
 |
83c29f |
+ addi $ptr,$ptr,0x10
|
|
 |
83c29f |
+ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
|
|
 |
83c29f |
+ li $cnt,8
|
|
 |
83c29f |
+ vxor $zero,$zero,$zero
|
|
 |
83c29f |
+ mtctr $cnt
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ?lvsr $outperm,0,$out
|
|
 |
83c29f |
+ vspltisb $outmask,-1
|
|
 |
83c29f |
+ lvx $outhead,0,$out
|
|
 |
83c29f |
+ ?vperm $outmask,$zero,$outmask,$outperm
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ blt Loop128
|
|
 |
83c29f |
+ addi $inp,$inp,8
|
|
 |
83c29f |
+ beq L192
|
|
 |
83c29f |
+ addi $inp,$inp,8
|
|
 |
83c29f |
+ b L256
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 4
|
|
 |
83c29f |
+Loop128:
|
|
 |
83c29f |
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ vcipherlast $key,$key,$rcon
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vadduwm $rcon,$rcon,$rcon
|
|
 |
83c29f |
+ vxor $in0,$in0,$key
|
|
 |
83c29f |
+ bdnz Loop128
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $rcon,0,$ptr # last two round keys
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ vcipherlast $key,$key,$rcon
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vadduwm $rcon,$rcon,$rcon
|
|
 |
83c29f |
+ vxor $in0,$in0,$key
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ vcipherlast $key,$key,$rcon
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vxor $in0,$in0,$key
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ addi $inp,$out,15 # 15 is not typo
|
|
 |
83c29f |
+ addi $out,$out,0x50
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ li $rounds,10
|
|
 |
83c29f |
+ b Ldone
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 4
|
|
 |
83c29f |
+L192:
|
|
 |
83c29f |
+ lvx $tmp,0,$inp
|
|
 |
83c29f |
+ li $cnt,4
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
|
 |
83c29f |
+ vspltisb $key,8 # borrow $key
|
|
 |
83c29f |
+ mtctr $cnt
|
|
 |
83c29f |
+ vsububm $mask,$mask,$key # adjust the mask
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Loop192:
|
|
 |
83c29f |
+ vperm $key,$in1,$in1,$mask # roate-n-splat
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
 |
83c29f |
+ vcipherlast $key,$key,$rcon
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vsldoi $stage,$zero,$in1,8
|
|
 |
83c29f |
+ vspltw $tmp,$in0,3
|
|
 |
83c29f |
+ vxor $tmp,$tmp,$in1
|
|
 |
83c29f |
+ vsldoi $in1,$zero,$in1,12 # >>32
|
|
 |
83c29f |
+ vadduwm $rcon,$rcon,$rcon
|
|
 |
83c29f |
+ vxor $in1,$in1,$tmp
|
|
 |
83c29f |
+ vxor $in0,$in0,$key
|
|
 |
83c29f |
+ vxor $in1,$in1,$key
|
|
 |
83c29f |
+ vsldoi $stage,$stage,$in0,8
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
 |
83c29f |
+ vperm $outtail,$stage,$stage,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ vcipherlast $key,$key,$rcon
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vsldoi $stage,$in0,$in1,8
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vperm $outtail,$stage,$stage,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vspltw $tmp,$in0,3
|
|
 |
83c29f |
+ vxor $tmp,$tmp,$in1
|
|
 |
83c29f |
+ vsldoi $in1,$zero,$in1,12 # >>32
|
|
 |
83c29f |
+ vadduwm $rcon,$rcon,$rcon
|
|
 |
83c29f |
+ vxor $in1,$in1,$tmp
|
|
 |
83c29f |
+ vxor $in0,$in0,$key
|
|
 |
83c29f |
+ vxor $in1,$in1,$key
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $inp,$out,15 # 15 is not typo
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+ bdnz Loop192
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ li $rounds,12
|
|
 |
83c29f |
+ addi $out,$out,0x20
|
|
 |
83c29f |
+ b Ldone
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 4
|
|
 |
83c29f |
+L256:
|
|
 |
83c29f |
+ lvx $tmp,0,$inp
|
|
 |
83c29f |
+ li $cnt,7
|
|
 |
83c29f |
+ li $rounds,14
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
|
 |
83c29f |
+ mtctr $cnt
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Loop256:
|
|
 |
83c29f |
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$in0,12 # >>32
|
|
 |
83c29f |
+ vperm $outtail,$in1,$in1,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ vcipherlast $key,$key,$rcon
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in0,$in0,$tmp
|
|
 |
83c29f |
+ vadduwm $rcon,$rcon,$rcon
|
|
 |
83c29f |
+ vxor $in0,$in0,$key
|
|
 |
83c29f |
+ vperm $outtail,$in0,$in0,$outperm # rotate
|
|
 |
83c29f |
+ vsel $stage,$outhead,$outtail,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$outtail
|
|
 |
83c29f |
+ stvx $stage,0,$out
|
|
 |
83c29f |
+ addi $inp,$out,15 # 15 is not typo
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+ bdz Ldone
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vspltw $key,$in0,3 # just splat
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$in1,12 # >>32
|
|
 |
83c29f |
+ vsbox $key,$key
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $in1,$in1,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in1,$in1,$tmp
|
|
 |
83c29f |
+ vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
 |
83c29f |
+ vxor $in1,$in1,$tmp
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $in1,$in1,$key
|
|
 |
83c29f |
+ b Loop256
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 4
|
|
 |
83c29f |
+Ldone:
|
|
 |
83c29f |
+ lvx $in1,0,$inp # redundant in aligned case
|
|
 |
83c29f |
+ vsel $in1,$outhead,$in1,$outmask
|
|
 |
83c29f |
+ stvx $in1,0,$inp
|
|
 |
83c29f |
+ li $ptr,0
|
|
 |
83c29f |
+ mtspr 256,$vrsave
|
|
 |
83c29f |
+ stw $rounds,0($out)
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Lenc_key_abort:
|
|
 |
83c29f |
+ mr r3,$ptr
|
|
 |
83c29f |
+ blr
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+ .byte 0,12,0x14,1,0,0,3,0
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.globl .${prefix}_set_decrypt_key
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+.${prefix}_set_decrypt_key:
|
|
 |
83c29f |
+ $STU $sp,-$FRAME($sp)
|
|
 |
83c29f |
+ mflr r10
|
|
 |
83c29f |
+ $PUSH r10,$FRAME+$LRSAVE($sp)
|
|
 |
83c29f |
+ bl Lset_encrypt_key
|
|
 |
83c29f |
+ mtlr r10
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ cmpwi r3,0
|
|
 |
83c29f |
+ bne- Ldec_key_abort
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ slwi $cnt,$rounds,4
|
|
 |
83c29f |
+ subi $inp,$out,240 # first round key
|
|
 |
83c29f |
+ srwi $rounds,$rounds,1
|
|
 |
83c29f |
+ add $out,$inp,$cnt # last round key
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Ldeckey:
|
|
 |
83c29f |
+ lwz r0, 0($inp)
|
|
 |
83c29f |
+ lwz r6, 4($inp)
|
|
 |
83c29f |
+ lwz r7, 8($inp)
|
|
 |
83c29f |
+ lwz r8, 12($inp)
|
|
 |
83c29f |
+ addi $inp,$inp,16
|
|
 |
83c29f |
+ lwz r9, 0($out)
|
|
 |
83c29f |
+ lwz r10,4($out)
|
|
 |
83c29f |
+ lwz r11,8($out)
|
|
 |
83c29f |
+ lwz r12,12($out)
|
|
 |
83c29f |
+ stw r0, 0($out)
|
|
 |
83c29f |
+ stw r6, 4($out)
|
|
 |
83c29f |
+ stw r7, 8($out)
|
|
 |
83c29f |
+ stw r8, 12($out)
|
|
 |
83c29f |
+ subi $out,$out,16
|
|
 |
83c29f |
+ stw r9, -16($inp)
|
|
 |
83c29f |
+ stw r10,-12($inp)
|
|
 |
83c29f |
+ stw r11,-8($inp)
|
|
 |
83c29f |
+ stw r12,-4($inp)
|
|
 |
83c29f |
+ bdnz Ldeckey
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ xor r3,r3,r3 # return value
|
|
 |
83c29f |
+Ldec_key_abort:
|
|
 |
83c29f |
+ addi $sp,$sp,$FRAME
|
|
 |
83c29f |
+ blr
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+ .byte 0,12,4,1,0x80,0,3,0
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+}}}
|
|
 |
83c29f |
+#########################################################################
|
|
 |
83c29f |
+{{{ # Single block en- and decrypt procedures #
|
|
 |
83c29f |
+sub gen_block () {
|
|
 |
83c29f |
+my $dir = shift;
|
|
 |
83c29f |
+my $n = $dir eq "de" ? "n" : "";
|
|
 |
83c29f |
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
+.globl .${prefix}_${dir}crypt
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+.${prefix}_${dir}crypt:
|
|
 |
83c29f |
+ lwz $rounds,240($key)
|
|
 |
83c29f |
+ lis r0,0xfc00
|
|
 |
83c29f |
+ mfspr $vrsave,256
|
|
 |
83c29f |
+ li $idx,15 # 15 is not typo
|
|
 |
83c29f |
+ mtspr 256,r0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx v0,0,$inp
|
|
 |
83c29f |
+ neg r11,$out
|
|
 |
83c29f |
+ lvx v1,$idx,$inp
|
|
 |
83c29f |
+ lvsl v2,0,$inp # inpperm
|
|
 |
83c29f |
+ le?vspltisb v4,0x0f
|
|
 |
83c29f |
+ ?lvsl v3,0,r11 # outperm
|
|
 |
83c29f |
+ le?vxor v2,v2,v4
|
|
 |
83c29f |
+ li $idx,16
|
|
 |
83c29f |
+ vperm v0,v0,v1,v2 # align [and byte swap in LE]
|
|
 |
83c29f |
+ lvx v1,0,$key
|
|
 |
83c29f |
+ ?lvsl v5,0,$key # keyperm
|
|
 |
83c29f |
+ srwi $rounds,$rounds,1
|
|
 |
83c29f |
+ lvx v2,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ subi $rounds,$rounds,1
|
|
 |
83c29f |
+ ?vperm v1,v1,v2,v5 # align round key
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor v0,v0,v1
|
|
 |
83c29f |
+ lvx v1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Loop_${dir}c:
|
|
 |
83c29f |
+ ?vperm v2,v2,v1,v5
|
|
 |
83c29f |
+ v${n}cipher v0,v0,v2
|
|
 |
83c29f |
+ lvx v2,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ ?vperm v1,v1,v2,v5
|
|
 |
83c29f |
+ v${n}cipher v0,v0,v1
|
|
 |
83c29f |
+ lvx v1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ bdnz Loop_${dir}c
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ?vperm v2,v2,v1,v5
|
|
 |
83c29f |
+ v${n}cipher v0,v0,v2
|
|
 |
83c29f |
+ lvx v2,$idx,$key
|
|
 |
83c29f |
+ ?vperm v1,v1,v2,v5
|
|
 |
83c29f |
+ v${n}cipherlast v0,v0,v1
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vspltisb v2,-1
|
|
 |
83c29f |
+ vxor v1,v1,v1
|
|
 |
83c29f |
+ li $idx,15 # 15 is not typo
|
|
 |
83c29f |
+ ?vperm v2,v1,v2,v3 # outmask
|
|
 |
83c29f |
+ le?vxor v3,v3,v4
|
|
 |
83c29f |
+ lvx v1,0,$out # outhead
|
|
 |
83c29f |
+ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
|
|
 |
83c29f |
+ vsel v1,v1,v0,v2
|
|
 |
83c29f |
+ lvx v4,$idx,$out
|
|
 |
83c29f |
+ stvx v1,0,$out
|
|
 |
83c29f |
+ vsel v0,v0,v4,v2
|
|
 |
83c29f |
+ stvx v0,$idx,$out
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ mtspr 256,$vrsave
|
|
 |
83c29f |
+ blr
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+ .byte 0,12,0x14,0,0,0,3,0
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+}
|
|
 |
83c29f |
+&gen_block("en");
|
|
 |
83c29f |
+&gen_block("de");
|
|
 |
83c29f |
+}}}
|
|
 |
83c29f |
+#########################################################################
|
|
 |
83c29f |
+{{{ # CBC en- and decrypt procedures #
|
|
 |
83c29f |
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
|
|
 |
83c29f |
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
|
|
 |
83c29f |
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
|
|
 |
83c29f |
+ map("v$_",(4..10));
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
+.globl .${prefix}_cbc_encrypt
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+.${prefix}_cbc_encrypt:
|
|
 |
83c29f |
+ ${UCMP}i $len,16
|
|
 |
83c29f |
+ bltlr-
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ cmpwi $enc,0 # test direction
|
|
 |
83c29f |
+ lis r0,0xffe0
|
|
 |
83c29f |
+ mfspr $vrsave,256
|
|
 |
83c29f |
+ mtspr 256,r0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ li $idx,15
|
|
 |
83c29f |
+ vxor $rndkey0,$rndkey0,$rndkey0
|
|
 |
83c29f |
+ le?vspltisb $tmp,0x0f
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $ivec,0,$ivp # load [unaligned] iv
|
|
 |
83c29f |
+ lvsl $inpperm,0,$ivp
|
|
 |
83c29f |
+ lvx $inptail,$idx,$ivp
|
|
 |
83c29f |
+ le?vxor $inpperm,$inpperm,$tmp
|
|
 |
83c29f |
+ vperm $ivec,$ivec,$inptail,$inpperm
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ neg r11,$inp
|
|
 |
83c29f |
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
|
|
 |
83c29f |
+ lwz $rounds,240($key)
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
 |
83c29f |
+ lvx $inptail,0,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,15 # 15 is not typo
|
|
 |
83c29f |
+ le?vxor $inpperm,$inpperm,$tmp
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ?lvsr $outperm,0,$out # prepare for unaligned store
|
|
 |
83c29f |
+ vspltisb $outmask,-1
|
|
 |
83c29f |
+ lvx $outhead,0,$out
|
|
 |
83c29f |
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
 |
83c29f |
+ le?vxor $outperm,$outperm,$tmp
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ srwi $rounds,$rounds,1
|
|
 |
83c29f |
+ li $idx,16
|
|
 |
83c29f |
+ subi $rounds,$rounds,1
|
|
 |
83c29f |
+ beq Lcbc_dec
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Lcbc_enc:
|
|
 |
83c29f |
+ vmr $inout,$inptail
|
|
 |
83c29f |
+ lvx $inptail,0,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,16
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+ subi $len,$len,16 # len-=16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $rndkey0,0,$key
|
|
 |
83c29f |
+ vperm $inout,$inout,$inptail,$inpperm
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vxor $inout,$inout,$rndkey0
|
|
 |
83c29f |
+ lvx $rndkey0,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ vxor $inout,$inout,$ivec
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Loop_cbc_enc:
|
|
 |
83c29f |
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
 |
83c29f |
+ vcipher $inout,$inout,$rndkey1
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vcipher $inout,$inout,$rndkey0
|
|
 |
83c29f |
+ lvx $rndkey0,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ bdnz Loop_cbc_enc
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
 |
83c29f |
+ vcipher $inout,$inout,$rndkey1
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ li $idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vcipherlast $ivec,$inout,$rndkey0
|
|
 |
83c29f |
+ ${UCMP}i $len,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vperm $tmp,$ivec,$ivec,$outperm
|
|
 |
83c29f |
+ vsel $inout,$outhead,$tmp,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$tmp
|
|
 |
83c29f |
+ stvx $inout,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+ bge Lcbc_enc
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ b Lcbc_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 4
|
|
 |
83c29f |
+Lcbc_dec:
|
|
 |
83c29f |
+ ${UCMP}i $len,128
|
|
 |
83c29f |
+ bge _aesp8_cbc_decrypt8x
|
|
 |
83c29f |
+ vmr $tmp,$inptail
|
|
 |
83c29f |
+ lvx $inptail,0,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,16
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+ subi $len,$len,16 # len-=16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $rndkey0,0,$key
|
|
 |
83c29f |
+ vperm $tmp,$tmp,$inptail,$inpperm
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vxor $inout,$tmp,$rndkey0
|
|
 |
83c29f |
+ lvx $rndkey0,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Loop_cbc_dec:
|
|
 |
83c29f |
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
 |
83c29f |
+ vncipher $inout,$inout,$rndkey1
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vncipher $inout,$inout,$rndkey0
|
|
 |
83c29f |
+ lvx $rndkey0,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ bdnz Loop_cbc_dec
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
 |
83c29f |
+ vncipher $inout,$inout,$rndkey1
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ li $idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vncipherlast $inout,$inout,$rndkey0
|
|
 |
83c29f |
+ ${UCMP}i $len,16
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vxor $inout,$inout,$ivec
|
|
 |
83c29f |
+ vmr $ivec,$tmp
|
|
 |
83c29f |
+ vperm $tmp,$inout,$inout,$outperm
|
|
 |
83c29f |
+ vsel $inout,$outhead,$tmp,$outmask
|
|
 |
83c29f |
+ vmr $outhead,$tmp
|
|
 |
83c29f |
+ stvx $inout,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+ bge Lcbc_dec
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Lcbc_done:
|
|
 |
83c29f |
+ addi $out,$out,-1
|
|
 |
83c29f |
+ lvx $inout,0,$out # redundant in aligned case
|
|
 |
83c29f |
+ vsel $inout,$outhead,$inout,$outmask
|
|
 |
83c29f |
+ stvx $inout,0,$out
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ neg $enc,$ivp # write [unaligned] iv
|
|
 |
83c29f |
+ li $idx,15 # 15 is not typo
|
|
 |
83c29f |
+ vxor $rndkey0,$rndkey0,$rndkey0
|
|
 |
83c29f |
+ vspltisb $outmask,-1
|
|
 |
83c29f |
+ le?vspltisb $tmp,0x0f
|
|
 |
83c29f |
+ ?lvsl $outperm,0,$enc
|
|
 |
83c29f |
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
 |
83c29f |
+ le?vxor $outperm,$outperm,$tmp
|
|
 |
83c29f |
+ lvx $outhead,0,$ivp
|
|
 |
83c29f |
+ vperm $ivec,$ivec,$ivec,$outperm
|
|
 |
83c29f |
+ vsel $inout,$outhead,$ivec,$outmask
|
|
 |
83c29f |
+ lvx $inptail,$idx,$ivp
|
|
 |
83c29f |
+ stvx $inout,0,$ivp
|
|
 |
83c29f |
+ vsel $inout,$ivec,$inptail,$outmask
|
|
 |
83c29f |
+ stvx $inout,$idx,$ivp
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ mtspr 256,$vrsave
|
|
 |
83c29f |
+ blr
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+ .byte 0,12,0x14,0,0,0,6,0
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+#########################################################################
|
|
 |
83c29f |
+{{ # Optimized CBC decrypt procedure #
|
|
 |
83c29f |
+my $key_="r11";
|
|
 |
83c29f |
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
|
 |
83c29f |
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
|
|
 |
83c29f |
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
|
|
 |
83c29f |
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
 |
83c29f |
+ # v26-v31 last 6 round keys
|
|
 |
83c29f |
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+_aesp8_cbc_decrypt8x:
|
|
 |
83c29f |
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
 |
83c29f |
+ li r10,`$FRAME+8*16+15`
|
|
 |
83c29f |
+ li r11,`$FRAME+8*16+31`
|
|
 |
83c29f |
+ stvx v20,r10,$sp # ABI says so
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v21,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v22,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v23,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v24,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v25,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v26,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v27,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v28,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v29,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v30,r10,$sp
|
|
 |
83c29f |
+ stvx v31,r11,$sp
|
|
 |
83c29f |
+ li r0,-1
|
|
 |
83c29f |
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
 |
83c29f |
+ li $x10,0x10
|
|
 |
83c29f |
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x20,0x20
|
|
 |
83c29f |
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x30,0x30
|
|
 |
83c29f |
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x40,0x40
|
|
 |
83c29f |
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x50,0x50
|
|
 |
83c29f |
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x60,0x60
|
|
 |
83c29f |
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x70,0x70
|
|
 |
83c29f |
+ mtspr 256,r0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ subi $rounds,$rounds,3 # -4 in total
|
|
 |
83c29f |
+ subi $len,$len,128 # bias
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $rndkey0,$x00,$key # load key schedule
|
|
 |
83c29f |
+ lvx v30,$x10,$key
|
|
 |
83c29f |
+ addi $key,$key,0x20
|
|
 |
83c29f |
+ lvx v31,$x00,$key
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
 |
83c29f |
+ addi $key_,$sp,$FRAME+15
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Load_cbc_dec_key:
|
|
 |
83c29f |
+ ?vperm v24,v30,v31,$keyperm
|
|
 |
83c29f |
+ lvx v30,$x10,$key
|
|
 |
83c29f |
+ addi $key,$key,0x20
|
|
 |
83c29f |
+ stvx v24,$x00,$key_ # off-load round[1]
|
|
 |
83c29f |
+ ?vperm v25,v31,v30,$keyperm
|
|
 |
83c29f |
+ lvx v31,$x00,$key
|
|
 |
83c29f |
+ stvx v25,$x10,$key_ # off-load round[2]
|
|
 |
83c29f |
+ addi $key_,$key_,0x20
|
|
 |
83c29f |
+ bdnz Load_cbc_dec_key
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx v26,$x10,$key
|
|
 |
83c29f |
+ ?vperm v24,v30,v31,$keyperm
|
|
 |
83c29f |
+ lvx v27,$x20,$key
|
|
 |
83c29f |
+ stvx v24,$x00,$key_ # off-load round[3]
|
|
 |
83c29f |
+ ?vperm v25,v31,v26,$keyperm
|
|
 |
83c29f |
+ lvx v28,$x30,$key
|
|
 |
83c29f |
+ stvx v25,$x10,$key_ # off-load round[4]
|
|
 |
83c29f |
+ addi $key_,$sp,$FRAME+15 # rewind $key_
|
|
 |
83c29f |
+ ?vperm v26,v26,v27,$keyperm
|
|
 |
83c29f |
+ lvx v29,$x40,$key
|
|
 |
83c29f |
+ ?vperm v27,v27,v28,$keyperm
|
|
 |
83c29f |
+ lvx v30,$x50,$key
|
|
 |
83c29f |
+ ?vperm v28,v28,v29,$keyperm
|
|
 |
83c29f |
+ lvx v31,$x60,$key
|
|
 |
83c29f |
+ ?vperm v29,v29,v30,$keyperm
|
|
 |
83c29f |
+ lvx $out0,$x70,$key # borrow $out0
|
|
 |
83c29f |
+ ?vperm v30,v30,v31,$keyperm
|
|
 |
83c29f |
+ lvx v24,$x00,$key_ # pre-load round[1]
|
|
 |
83c29f |
+ ?vperm v31,v31,$out0,$keyperm
|
|
 |
83c29f |
+ lvx v25,$x10,$key_ # pre-load round[2]
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ #lvx $inptail,0,$inp # "caller" already did this
|
|
 |
83c29f |
+ #addi $inp,$inp,15 # 15 is not typo
|
|
 |
83c29f |
+ subi $inp,$inp,15 # undo "caller"
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?li $idx,8
|
|
 |
83c29f |
+ lvx_u $in0,$x00,$inp # load first 8 "words"
|
|
 |
83c29f |
+ le?lvsl $inpperm,0,$idx
|
|
 |
83c29f |
+ le?vspltisb $tmp,0x0f
|
|
 |
83c29f |
+ lvx_u $in1,$x10,$inp
|
|
 |
83c29f |
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
|
|
 |
83c29f |
+ lvx_u $in2,$x20,$inp
|
|
 |
83c29f |
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
 |
83c29f |
+ lvx_u $in3,$x30,$inp
|
|
 |
83c29f |
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
 |
83c29f |
+ lvx_u $in4,$x40,$inp
|
|
 |
83c29f |
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
 |
83c29f |
+ vxor $out0,$in0,$rndkey0
|
|
 |
83c29f |
+ lvx_u $in5,$x50,$inp
|
|
 |
83c29f |
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
 |
83c29f |
+ vxor $out1,$in1,$rndkey0
|
|
 |
83c29f |
+ lvx_u $in6,$x60,$inp
|
|
 |
83c29f |
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
 |
83c29f |
+ vxor $out2,$in2,$rndkey0
|
|
 |
83c29f |
+ lvx_u $in7,$x70,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,0x80
|
|
 |
83c29f |
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
 |
83c29f |
+ vxor $out3,$in3,$rndkey0
|
|
 |
83c29f |
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
 |
83c29f |
+ vxor $out4,$in4,$rndkey0
|
|
 |
83c29f |
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
 |
83c29f |
+ vxor $out5,$in5,$rndkey0
|
|
 |
83c29f |
+ vxor $out6,$in6,$rndkey0
|
|
 |
83c29f |
+ vxor $out7,$in7,$rndkey0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+ b Loop_cbc_dec8x
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Loop_cbc_dec8x:
|
|
 |
83c29f |
+ vncipher $out0,$out0,v24
|
|
 |
83c29f |
+ vncipher $out1,$out1,v24
|
|
 |
83c29f |
+ vncipher $out2,$out2,v24
|
|
 |
83c29f |
+ vncipher $out3,$out3,v24
|
|
 |
83c29f |
+ vncipher $out4,$out4,v24
|
|
 |
83c29f |
+ vncipher $out5,$out5,v24
|
|
 |
83c29f |
+ vncipher $out6,$out6,v24
|
|
 |
83c29f |
+ vncipher $out7,$out7,v24
|
|
 |
83c29f |
+ lvx v24,$x20,$key_ # round[3]
|
|
 |
83c29f |
+ addi $key_,$key_,0x20
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out0,$out0,v25
|
|
 |
83c29f |
+ vncipher $out1,$out1,v25
|
|
 |
83c29f |
+ vncipher $out2,$out2,v25
|
|
 |
83c29f |
+ vncipher $out3,$out3,v25
|
|
 |
83c29f |
+ vncipher $out4,$out4,v25
|
|
 |
83c29f |
+ vncipher $out5,$out5,v25
|
|
 |
83c29f |
+ vncipher $out6,$out6,v25
|
|
 |
83c29f |
+ vncipher $out7,$out7,v25
|
|
 |
83c29f |
+ lvx v25,$x10,$key_ # round[4]
|
|
 |
83c29f |
+ bdnz Loop_cbc_dec8x
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ subic $len,$len,128 # $len-=128
|
|
 |
83c29f |
+ vncipher $out0,$out0,v24
|
|
 |
83c29f |
+ vncipher $out1,$out1,v24
|
|
 |
83c29f |
+ vncipher $out2,$out2,v24
|
|
 |
83c29f |
+ vncipher $out3,$out3,v24
|
|
 |
83c29f |
+ vncipher $out4,$out4,v24
|
|
 |
83c29f |
+ vncipher $out5,$out5,v24
|
|
 |
83c29f |
+ vncipher $out6,$out6,v24
|
|
 |
83c29f |
+ vncipher $out7,$out7,v24
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ subfe. r0,r0,r0 # borrow?-1:0
|
|
 |
83c29f |
+ vncipher $out0,$out0,v25
|
|
 |
83c29f |
+ vncipher $out1,$out1,v25
|
|
 |
83c29f |
+ vncipher $out2,$out2,v25
|
|
 |
83c29f |
+ vncipher $out3,$out3,v25
|
|
 |
83c29f |
+ vncipher $out4,$out4,v25
|
|
 |
83c29f |
+ vncipher $out5,$out5,v25
|
|
 |
83c29f |
+ vncipher $out6,$out6,v25
|
|
 |
83c29f |
+ vncipher $out7,$out7,v25
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ and r0,r0,$len
|
|
 |
83c29f |
+ vncipher $out0,$out0,v26
|
|
 |
83c29f |
+ vncipher $out1,$out1,v26
|
|
 |
83c29f |
+ vncipher $out2,$out2,v26
|
|
 |
83c29f |
+ vncipher $out3,$out3,v26
|
|
 |
83c29f |
+ vncipher $out4,$out4,v26
|
|
 |
83c29f |
+ vncipher $out5,$out5,v26
|
|
 |
83c29f |
+ vncipher $out6,$out6,v26
|
|
 |
83c29f |
+ vncipher $out7,$out7,v26
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ add $inp,$inp,r0 # $inp is adjusted in such
|
|
 |
83c29f |
+ # way that at exit from the
|
|
 |
83c29f |
+ # loop inX-in7 are loaded
|
|
 |
83c29f |
+ # with last "words"
|
|
 |
83c29f |
+ vncipher $out0,$out0,v27
|
|
 |
83c29f |
+ vncipher $out1,$out1,v27
|
|
 |
83c29f |
+ vncipher $out2,$out2,v27
|
|
 |
83c29f |
+ vncipher $out3,$out3,v27
|
|
 |
83c29f |
+ vncipher $out4,$out4,v27
|
|
 |
83c29f |
+ vncipher $out5,$out5,v27
|
|
 |
83c29f |
+ vncipher $out6,$out6,v27
|
|
 |
83c29f |
+ vncipher $out7,$out7,v27
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ addi $key_,$sp,$FRAME+15 # rewind $key_
|
|
 |
83c29f |
+ vncipher $out0,$out0,v28
|
|
 |
83c29f |
+ vncipher $out1,$out1,v28
|
|
 |
83c29f |
+ vncipher $out2,$out2,v28
|
|
 |
83c29f |
+ vncipher $out3,$out3,v28
|
|
 |
83c29f |
+ vncipher $out4,$out4,v28
|
|
 |
83c29f |
+ vncipher $out5,$out5,v28
|
|
 |
83c29f |
+ vncipher $out6,$out6,v28
|
|
 |
83c29f |
+ vncipher $out7,$out7,v28
|
|
 |
83c29f |
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out0,$out0,v29
|
|
 |
83c29f |
+ vncipher $out1,$out1,v29
|
|
 |
83c29f |
+ vncipher $out2,$out2,v29
|
|
 |
83c29f |
+ vncipher $out3,$out3,v29
|
|
 |
83c29f |
+ vncipher $out4,$out4,v29
|
|
 |
83c29f |
+ vncipher $out5,$out5,v29
|
|
 |
83c29f |
+ vncipher $out6,$out6,v29
|
|
 |
83c29f |
+ vncipher $out7,$out7,v29
|
|
 |
83c29f |
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out0,$out0,v30
|
|
 |
83c29f |
+ vxor $ivec,$ivec,v31 # xor with last round key
|
|
 |
83c29f |
+ vncipher $out1,$out1,v30
|
|
 |
83c29f |
+ vxor $in0,$in0,v31
|
|
 |
83c29f |
+ vncipher $out2,$out2,v30
|
|
 |
83c29f |
+ vxor $in1,$in1,v31
|
|
 |
83c29f |
+ vncipher $out3,$out3,v30
|
|
 |
83c29f |
+ vxor $in2,$in2,v31
|
|
 |
83c29f |
+ vncipher $out4,$out4,v30
|
|
 |
83c29f |
+ vxor $in3,$in3,v31
|
|
 |
83c29f |
+ vncipher $out5,$out5,v30
|
|
 |
83c29f |
+ vxor $in4,$in4,v31
|
|
 |
83c29f |
+ vncipher $out6,$out6,v30
|
|
 |
83c29f |
+ vxor $in5,$in5,v31
|
|
 |
83c29f |
+ vncipher $out7,$out7,v30
|
|
 |
83c29f |
+ vxor $in6,$in6,v31
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipherlast $out0,$out0,$ivec
|
|
 |
83c29f |
+ vncipherlast $out1,$out1,$in0
|
|
 |
83c29f |
+ lvx_u $in0,$x00,$inp # load next input block
|
|
 |
83c29f |
+ vncipherlast $out2,$out2,$in1
|
|
 |
83c29f |
+ lvx_u $in1,$x10,$inp
|
|
 |
83c29f |
+ vncipherlast $out3,$out3,$in2
|
|
 |
83c29f |
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
 |
83c29f |
+ lvx_u $in2,$x20,$inp
|
|
 |
83c29f |
+ vncipherlast $out4,$out4,$in3
|
|
 |
83c29f |
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
 |
83c29f |
+ lvx_u $in3,$x30,$inp
|
|
 |
83c29f |
+ vncipherlast $out5,$out5,$in4
|
|
 |
83c29f |
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
 |
83c29f |
+ lvx_u $in4,$x40,$inp
|
|
 |
83c29f |
+ vncipherlast $out6,$out6,$in5
|
|
 |
83c29f |
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
 |
83c29f |
+ lvx_u $in5,$x50,$inp
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$in6
|
|
 |
83c29f |
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
 |
83c29f |
+ lvx_u $in6,$x60,$inp
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
 |
83c29f |
+ lvx_u $in7,$x70,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,0x80
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
 |
83c29f |
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
 |
83c29f |
+ stvx_u $out0,$x00,$out
|
|
 |
83c29f |
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
 |
83c29f |
+ vxor $out0,$in0,$rndkey0
|
|
 |
83c29f |
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
 |
83c29f |
+ stvx_u $out1,$x10,$out
|
|
 |
83c29f |
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
 |
83c29f |
+ vxor $out1,$in1,$rndkey0
|
|
 |
83c29f |
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
 |
83c29f |
+ stvx_u $out2,$x20,$out
|
|
 |
83c29f |
+ vxor $out2,$in2,$rndkey0
|
|
 |
83c29f |
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
 |
83c29f |
+ stvx_u $out3,$x30,$out
|
|
 |
83c29f |
+ vxor $out3,$in3,$rndkey0
|
|
 |
83c29f |
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
 |
83c29f |
+ stvx_u $out4,$x40,$out
|
|
 |
83c29f |
+ vxor $out4,$in4,$rndkey0
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ stvx_u $out5,$x50,$out
|
|
 |
83c29f |
+ vxor $out5,$in5,$rndkey0
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x60,$out
|
|
 |
83c29f |
+ vxor $out6,$in6,$rndkey0
|
|
 |
83c29f |
+ stvx_u $out7,$x70,$out
|
|
 |
83c29f |
+ addi $out,$out,0x80
|
|
 |
83c29f |
+ vxor $out7,$in7,$rndkey0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+ beq Loop_cbc_dec8x # did $len-=128 borrow?
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ addic. $len,$len,128
|
|
 |
83c29f |
+ beq Lcbc_dec8x_done
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Loop_cbc_dec8x_tail: # up to 7 "words" tail...
|
|
 |
83c29f |
+ vncipher $out1,$out1,v24
|
|
 |
83c29f |
+ vncipher $out2,$out2,v24
|
|
 |
83c29f |
+ vncipher $out3,$out3,v24
|
|
 |
83c29f |
+ vncipher $out4,$out4,v24
|
|
 |
83c29f |
+ vncipher $out5,$out5,v24
|
|
 |
83c29f |
+ vncipher $out6,$out6,v24
|
|
 |
83c29f |
+ vncipher $out7,$out7,v24
|
|
 |
83c29f |
+ lvx v24,$x20,$key_ # round[3]
|
|
 |
83c29f |
+ addi $key_,$key_,0x20
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v25
|
|
 |
83c29f |
+ vncipher $out2,$out2,v25
|
|
 |
83c29f |
+ vncipher $out3,$out3,v25
|
|
 |
83c29f |
+ vncipher $out4,$out4,v25
|
|
 |
83c29f |
+ vncipher $out5,$out5,v25
|
|
 |
83c29f |
+ vncipher $out6,$out6,v25
|
|
 |
83c29f |
+ vncipher $out7,$out7,v25
|
|
 |
83c29f |
+ lvx v25,$x10,$key_ # round[4]
|
|
 |
83c29f |
+ bdnz Loop_cbc_dec8x_tail
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v24
|
|
 |
83c29f |
+ vncipher $out2,$out2,v24
|
|
 |
83c29f |
+ vncipher $out3,$out3,v24
|
|
 |
83c29f |
+ vncipher $out4,$out4,v24
|
|
 |
83c29f |
+ vncipher $out5,$out5,v24
|
|
 |
83c29f |
+ vncipher $out6,$out6,v24
|
|
 |
83c29f |
+ vncipher $out7,$out7,v24
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v25
|
|
 |
83c29f |
+ vncipher $out2,$out2,v25
|
|
 |
83c29f |
+ vncipher $out3,$out3,v25
|
|
 |
83c29f |
+ vncipher $out4,$out4,v25
|
|
 |
83c29f |
+ vncipher $out5,$out5,v25
|
|
 |
83c29f |
+ vncipher $out6,$out6,v25
|
|
 |
83c29f |
+ vncipher $out7,$out7,v25
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v26
|
|
 |
83c29f |
+ vncipher $out2,$out2,v26
|
|
 |
83c29f |
+ vncipher $out3,$out3,v26
|
|
 |
83c29f |
+ vncipher $out4,$out4,v26
|
|
 |
83c29f |
+ vncipher $out5,$out5,v26
|
|
 |
83c29f |
+ vncipher $out6,$out6,v26
|
|
 |
83c29f |
+ vncipher $out7,$out7,v26
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v27
|
|
 |
83c29f |
+ vncipher $out2,$out2,v27
|
|
 |
83c29f |
+ vncipher $out3,$out3,v27
|
|
 |
83c29f |
+ vncipher $out4,$out4,v27
|
|
 |
83c29f |
+ vncipher $out5,$out5,v27
|
|
 |
83c29f |
+ vncipher $out6,$out6,v27
|
|
 |
83c29f |
+ vncipher $out7,$out7,v27
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v28
|
|
 |
83c29f |
+ vncipher $out2,$out2,v28
|
|
 |
83c29f |
+ vncipher $out3,$out3,v28
|
|
 |
83c29f |
+ vncipher $out4,$out4,v28
|
|
 |
83c29f |
+ vncipher $out5,$out5,v28
|
|
 |
83c29f |
+ vncipher $out6,$out6,v28
|
|
 |
83c29f |
+ vncipher $out7,$out7,v28
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v29
|
|
 |
83c29f |
+ vncipher $out2,$out2,v29
|
|
 |
83c29f |
+ vncipher $out3,$out3,v29
|
|
 |
83c29f |
+ vncipher $out4,$out4,v29
|
|
 |
83c29f |
+ vncipher $out5,$out5,v29
|
|
 |
83c29f |
+ vncipher $out6,$out6,v29
|
|
 |
83c29f |
+ vncipher $out7,$out7,v29
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vncipher $out1,$out1,v30
|
|
 |
83c29f |
+ vxor $ivec,$ivec,v31 # last round key
|
|
 |
83c29f |
+ vncipher $out2,$out2,v30
|
|
 |
83c29f |
+ vxor $in1,$in1,v31
|
|
 |
83c29f |
+ vncipher $out3,$out3,v30
|
|
 |
83c29f |
+ vxor $in2,$in2,v31
|
|
 |
83c29f |
+ vncipher $out4,$out4,v30
|
|
 |
83c29f |
+ vxor $in3,$in3,v31
|
|
 |
83c29f |
+ vncipher $out5,$out5,v30
|
|
 |
83c29f |
+ vxor $in4,$in4,v31
|
|
 |
83c29f |
+ vncipher $out6,$out6,v30
|
|
 |
83c29f |
+ vxor $in5,$in5,v31
|
|
 |
83c29f |
+ vncipher $out7,$out7,v30
|
|
 |
83c29f |
+ vxor $in6,$in6,v31
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ cmplwi $len,32 # switch($len)
|
|
 |
83c29f |
+ blt Lcbc_dec8x_one
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+ beq Lcbc_dec8x_two
|
|
 |
83c29f |
+ cmplwi $len,64
|
|
 |
83c29f |
+ blt Lcbc_dec8x_three
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+ beq Lcbc_dec8x_four
|
|
 |
83c29f |
+ cmplwi $len,96
|
|
 |
83c29f |
+ blt Lcbc_dec8x_five
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+ beq Lcbc_dec8x_six
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Lcbc_dec8x_seven:
|
|
 |
83c29f |
+ vncipherlast $out1,$out1,$ivec
|
|
 |
83c29f |
+ vncipherlast $out2,$out2,$in1
|
|
 |
83c29f |
+ vncipherlast $out3,$out3,$in2
|
|
 |
83c29f |
+ vncipherlast $out4,$out4,$in3
|
|
 |
83c29f |
+ vncipherlast $out5,$out5,$in4
|
|
 |
83c29f |
+ vncipherlast $out6,$out6,$in5
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$in6
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
 |
83c29f |
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
 |
83c29f |
+ stvx_u $out1,$x00,$out
|
|
 |
83c29f |
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
 |
83c29f |
+ stvx_u $out2,$x10,$out
|
|
 |
83c29f |
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
 |
83c29f |
+ stvx_u $out3,$x20,$out
|
|
 |
83c29f |
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
 |
83c29f |
+ stvx_u $out4,$x30,$out
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ stvx_u $out5,$x40,$out
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x50,$out
|
|
 |
83c29f |
+ stvx_u $out7,$x60,$out
|
|
 |
83c29f |
+ addi $out,$out,0x70
|
|
 |
83c29f |
+ b Lcbc_dec8x_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lcbc_dec8x_six:
|
|
 |
83c29f |
+ vncipherlast $out2,$out2,$ivec
|
|
 |
83c29f |
+ vncipherlast $out3,$out3,$in2
|
|
 |
83c29f |
+ vncipherlast $out4,$out4,$in3
|
|
 |
83c29f |
+ vncipherlast $out5,$out5,$in4
|
|
 |
83c29f |
+ vncipherlast $out6,$out6,$in5
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$in6
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
 |
83c29f |
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
 |
83c29f |
+ stvx_u $out2,$x00,$out
|
|
 |
83c29f |
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
 |
83c29f |
+ stvx_u $out3,$x10,$out
|
|
 |
83c29f |
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
 |
83c29f |
+ stvx_u $out4,$x20,$out
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ stvx_u $out5,$x30,$out
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x40,$out
|
|
 |
83c29f |
+ stvx_u $out7,$x50,$out
|
|
 |
83c29f |
+ addi $out,$out,0x60
|
|
 |
83c29f |
+ b Lcbc_dec8x_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lcbc_dec8x_five:
|
|
 |
83c29f |
+ vncipherlast $out3,$out3,$ivec
|
|
 |
83c29f |
+ vncipherlast $out4,$out4,$in3
|
|
 |
83c29f |
+ vncipherlast $out5,$out5,$in4
|
|
 |
83c29f |
+ vncipherlast $out6,$out6,$in5
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$in6
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
 |
83c29f |
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
 |
83c29f |
+ stvx_u $out3,$x00,$out
|
|
 |
83c29f |
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
 |
83c29f |
+ stvx_u $out4,$x10,$out
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ stvx_u $out5,$x20,$out
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x30,$out
|
|
 |
83c29f |
+ stvx_u $out7,$x40,$out
|
|
 |
83c29f |
+ addi $out,$out,0x50
|
|
 |
83c29f |
+ b Lcbc_dec8x_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lcbc_dec8x_four:
|
|
 |
83c29f |
+ vncipherlast $out4,$out4,$ivec
|
|
 |
83c29f |
+ vncipherlast $out5,$out5,$in4
|
|
 |
83c29f |
+ vncipherlast $out6,$out6,$in5
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$in6
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
 |
83c29f |
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
 |
83c29f |
+ stvx_u $out4,$x00,$out
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ stvx_u $out5,$x10,$out
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x20,$out
|
|
 |
83c29f |
+ stvx_u $out7,$x30,$out
|
|
 |
83c29f |
+ addi $out,$out,0x40
|
|
 |
83c29f |
+ b Lcbc_dec8x_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lcbc_dec8x_three:
|
|
 |
83c29f |
+ vncipherlast $out5,$out5,$ivec
|
|
 |
83c29f |
+ vncipherlast $out6,$out6,$in5
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$in6
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ stvx_u $out5,$x00,$out
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x10,$out
|
|
 |
83c29f |
+ stvx_u $out7,$x20,$out
|
|
 |
83c29f |
+ addi $out,$out,0x30
|
|
 |
83c29f |
+ b Lcbc_dec8x_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lcbc_dec8x_two:
|
|
 |
83c29f |
+ vncipherlast $out6,$out6,$ivec
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$in6
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x00,$out
|
|
 |
83c29f |
+ stvx_u $out7,$x10,$out
|
|
 |
83c29f |
+ addi $out,$out,0x20
|
|
 |
83c29f |
+ b Lcbc_dec8x_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lcbc_dec8x_one:
|
|
 |
83c29f |
+ vncipherlast $out7,$out7,$ivec
|
|
 |
83c29f |
+ vmr $ivec,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out7,0,$out
|
|
 |
83c29f |
+ addi $out,$out,0x10
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Lcbc_dec8x_done:
|
|
 |
83c29f |
+ le?vperm $ivec,$ivec,$ivec,$inpperm
|
|
 |
83c29f |
+ stvx_u $ivec,0,$ivp # write [unaligned] iv
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ li r10,`$FRAME+15`
|
|
 |
83c29f |
+ li r11,`$FRAME+31`
|
|
 |
83c29f |
+ stvx $inpperm,r10,$sp # wipe copies of round keys
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx $inpperm,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx $inpperm,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx $inpperm,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx $inpperm,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx $inpperm,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx $inpperm,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx $inpperm,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ mtspr 256,$vrsave
|
|
 |
83c29f |
+ lvx v20,r10,$sp # ABI says so
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ lvx v21,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ lvx v22,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ lvx v23,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ lvx v24,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ lvx v25,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ lvx v26,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ lvx v27,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ lvx v28,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ lvx v29,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ lvx v30,r10,$sp
|
|
 |
83c29f |
+ lvx v31,r11,$sp
|
|
 |
83c29f |
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
 |
83c29f |
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
 |
83c29f |
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
 |
83c29f |
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
 |
83c29f |
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
 |
83c29f |
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
 |
83c29f |
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
 |
83c29f |
+ blr
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+ .byte 0,12,0x14,0,0x80,6,6,0
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+}} }}}
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+#########################################################################
|
|
 |
83c29f |
+{{{ # CTR procedure[s] #
|
|
 |
83c29f |
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
|
|
 |
83c29f |
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
|
|
 |
83c29f |
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
|
|
 |
83c29f |
+ map("v$_",(4..11));
|
|
 |
83c29f |
+my $dat=$tmp;
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
+.globl .${prefix}_ctr32_encrypt_blocks
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+.${prefix}_ctr32_encrypt_blocks:
|
|
 |
83c29f |
+ ${UCMP}i $len,1
|
|
 |
83c29f |
+ bltlr-
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lis r0,0xfff0
|
|
 |
83c29f |
+ mfspr $vrsave,256
|
|
 |
83c29f |
+ mtspr 256,r0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ li $idx,15
|
|
 |
83c29f |
+ vxor $rndkey0,$rndkey0,$rndkey0
|
|
 |
83c29f |
+ le?vspltisb $tmp,0x0f
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $ivec,0,$ivp # load [unaligned] iv
|
|
 |
83c29f |
+ lvsl $inpperm,0,$ivp
|
|
 |
83c29f |
+ lvx $inptail,$idx,$ivp
|
|
 |
83c29f |
+ vspltisb $one,1
|
|
 |
83c29f |
+ le?vxor $inpperm,$inpperm,$tmp
|
|
 |
83c29f |
+ vperm $ivec,$ivec,$inptail,$inpperm
|
|
 |
83c29f |
+ vsldoi $one,$rndkey0,$one,1
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ neg r11,$inp
|
|
 |
83c29f |
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
|
|
 |
83c29f |
+ lwz $rounds,240($key)
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
 |
83c29f |
+ lvx $inptail,0,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,15 # 15 is not typo
|
|
 |
83c29f |
+ le?vxor $inpperm,$inpperm,$tmp
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ srwi $rounds,$rounds,1
|
|
 |
83c29f |
+ li $idx,16
|
|
 |
83c29f |
+ subi $rounds,$rounds,1
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ${UCMP}i $len,8
|
|
 |
83c29f |
+ bge _aesp8_ctr32_encrypt8x
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ?lvsr $outperm,0,$out # prepare for unaligned store
|
|
 |
83c29f |
+ vspltisb $outmask,-1
|
|
 |
83c29f |
+ lvx $outhead,0,$out
|
|
 |
83c29f |
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
 |
83c29f |
+ le?vxor $outperm,$outperm,$tmp
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $rndkey0,0,$key
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vxor $inout,$ivec,$rndkey0
|
|
 |
83c29f |
+ lvx $rndkey0,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ b Loop_ctr32_enc
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Loop_ctr32_enc:
|
|
 |
83c29f |
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
 |
83c29f |
+ vcipher $inout,$inout,$rndkey1
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vcipher $inout,$inout,$rndkey0
|
|
 |
83c29f |
+ lvx $rndkey0,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ bdnz Loop_ctr32_enc
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vadduwm $ivec,$ivec,$one
|
|
 |
83c29f |
+ vmr $dat,$inptail
|
|
 |
83c29f |
+ lvx $inptail,0,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,16
|
|
 |
83c29f |
+ subic. $len,$len,1 # blocks--
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
 |
83c29f |
+ vcipher $inout,$inout,$rndkey1
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ vperm $dat,$dat,$inptail,$inpperm
|
|
 |
83c29f |
+ li $idx,16
|
|
 |
83c29f |
+ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ lvx $rndkey0,0,$key
|
|
 |
83c29f |
+ vxor $dat,$dat,$rndkey1 # last round key
|
|
 |
83c29f |
+ vcipherlast $inout,$inout,$dat
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $rndkey1,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ vperm $inout,$inout,$inout,$outperm
|
|
 |
83c29f |
+ vsel $dat,$outhead,$inout,$outmask
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
 |
83c29f |
+ vmr $outhead,$inout
|
|
 |
83c29f |
+ vxor $inout,$ivec,$rndkey0
|
|
 |
83c29f |
+ lvx $rndkey0,$idx,$key
|
|
 |
83c29f |
+ addi $idx,$idx,16
|
|
 |
83c29f |
+ stvx $dat,0,$out
|
|
 |
83c29f |
+ addi $out,$out,16
|
|
 |
83c29f |
+ bne Loop_ctr32_enc
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ addi $out,$out,-1
|
|
 |
83c29f |
+ lvx $inout,0,$out # redundant in aligned case
|
|
 |
83c29f |
+ vsel $inout,$outhead,$inout,$outmask
|
|
 |
83c29f |
+ stvx $inout,0,$out
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ mtspr 256,$vrsave
|
|
 |
83c29f |
+ blr
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+ .byte 0,12,0x14,0,0,0,6,0
|
|
 |
83c29f |
+ .long 0
|
|
 |
83c29f |
+___
|
|
 |
83c29f |
+#########################################################################
|
|
 |
83c29f |
+{{ # Optimized CTR procedure #
|
|
 |
83c29f |
+my $key_="r11";
|
|
 |
83c29f |
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
|
 |
83c29f |
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
|
|
 |
83c29f |
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
|
|
 |
83c29f |
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
 |
83c29f |
+ # v26-v31 last 6 round keys
|
|
 |
83c29f |
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
|
|
 |
83c29f |
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+$code.=<<___;
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+_aesp8_ctr32_encrypt8x:
|
|
 |
83c29f |
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
 |
83c29f |
+ li r10,`$FRAME+8*16+15`
|
|
 |
83c29f |
+ li r11,`$FRAME+8*16+31`
|
|
 |
83c29f |
+ stvx v20,r10,$sp # ABI says so
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v21,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v22,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v23,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v24,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v25,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v26,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v27,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v28,r10,$sp
|
|
 |
83c29f |
+ addi r10,r10,32
|
|
 |
83c29f |
+ stvx v29,r11,$sp
|
|
 |
83c29f |
+ addi r11,r11,32
|
|
 |
83c29f |
+ stvx v30,r10,$sp
|
|
 |
83c29f |
+ stvx v31,r11,$sp
|
|
 |
83c29f |
+ li r0,-1
|
|
 |
83c29f |
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
 |
83c29f |
+ li $x10,0x10
|
|
 |
83c29f |
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x20,0x20
|
|
 |
83c29f |
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x30,0x30
|
|
 |
83c29f |
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x40,0x40
|
|
 |
83c29f |
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x50,0x50
|
|
 |
83c29f |
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x60,0x60
|
|
 |
83c29f |
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
 |
83c29f |
+ li $x70,0x70
|
|
 |
83c29f |
+ mtspr 256,r0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ subi $rounds,$rounds,3 # -4 in total
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx $rndkey0,$x00,$key # load key schedule
|
|
 |
83c29f |
+ lvx v30,$x10,$key
|
|
 |
83c29f |
+ addi $key,$key,0x20
|
|
 |
83c29f |
+ lvx v31,$x00,$key
|
|
 |
83c29f |
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
 |
83c29f |
+ addi $key_,$sp,$FRAME+15
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Load_ctr32_enc_key:
|
|
 |
83c29f |
+ ?vperm v24,v30,v31,$keyperm
|
|
 |
83c29f |
+ lvx v30,$x10,$key
|
|
 |
83c29f |
+ addi $key,$key,0x20
|
|
 |
83c29f |
+ stvx v24,$x00,$key_ # off-load round[1]
|
|
 |
83c29f |
+ ?vperm v25,v31,v30,$keyperm
|
|
 |
83c29f |
+ lvx v31,$x00,$key
|
|
 |
83c29f |
+ stvx v25,$x10,$key_ # off-load round[2]
|
|
 |
83c29f |
+ addi $key_,$key_,0x20
|
|
 |
83c29f |
+ bdnz Load_ctr32_enc_key
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ lvx v26,$x10,$key
|
|
 |
83c29f |
+ ?vperm v24,v30,v31,$keyperm
|
|
 |
83c29f |
+ lvx v27,$x20,$key
|
|
 |
83c29f |
+ stvx v24,$x00,$key_ # off-load round[3]
|
|
 |
83c29f |
+ ?vperm v25,v31,v26,$keyperm
|
|
 |
83c29f |
+ lvx v28,$x30,$key
|
|
 |
83c29f |
+ stvx v25,$x10,$key_ # off-load round[4]
|
|
 |
83c29f |
+ addi $key_,$sp,$FRAME+15 # rewind $key_
|
|
 |
83c29f |
+ ?vperm v26,v26,v27,$keyperm
|
|
 |
83c29f |
+ lvx v29,$x40,$key
|
|
 |
83c29f |
+ ?vperm v27,v27,v28,$keyperm
|
|
 |
83c29f |
+ lvx v30,$x50,$key
|
|
 |
83c29f |
+ ?vperm v28,v28,v29,$keyperm
|
|
 |
83c29f |
+ lvx v31,$x60,$key
|
|
 |
83c29f |
+ ?vperm v29,v29,v30,$keyperm
|
|
 |
83c29f |
+ lvx $out0,$x70,$key # borrow $out0
|
|
 |
83c29f |
+ ?vperm v30,v30,v31,$keyperm
|
|
 |
83c29f |
+ lvx v24,$x00,$key_ # pre-load round[1]
|
|
 |
83c29f |
+ ?vperm v31,v31,$out0,$keyperm
|
|
 |
83c29f |
+ lvx v25,$x10,$key_ # pre-load round[2]
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vadduwm $two,$one,$one
|
|
 |
83c29f |
+ subi $inp,$inp,15 # undo "caller"
|
|
 |
83c29f |
+ $SHL $len,$len,4
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vadduwm $out1,$ivec,$one # counter values ...
|
|
 |
83c29f |
+ vadduwm $out2,$ivec,$two
|
|
 |
83c29f |
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
|
 |
83c29f |
+ le?li $idx,8
|
|
 |
83c29f |
+ vadduwm $out3,$out1,$two
|
|
 |
83c29f |
+ vxor $out1,$out1,$rndkey0
|
|
 |
83c29f |
+ le?lvsl $inpperm,0,$idx
|
|
 |
83c29f |
+ vadduwm $out4,$out2,$two
|
|
 |
83c29f |
+ vxor $out2,$out2,$rndkey0
|
|
 |
83c29f |
+ le?vspltisb $tmp,0x0f
|
|
 |
83c29f |
+ vadduwm $out5,$out3,$two
|
|
 |
83c29f |
+ vxor $out3,$out3,$rndkey0
|
|
 |
83c29f |
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
|
|
 |
83c29f |
+ vadduwm $out6,$out4,$two
|
|
 |
83c29f |
+ vxor $out4,$out4,$rndkey0
|
|
 |
83c29f |
+ vadduwm $out7,$out5,$two
|
|
 |
83c29f |
+ vxor $out5,$out5,$rndkey0
|
|
 |
83c29f |
+ vadduwm $ivec,$out6,$two # next counter value
|
|
 |
83c29f |
+ vxor $out6,$out6,$rndkey0
|
|
 |
83c29f |
+ vxor $out7,$out7,$rndkey0
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+ b Loop_ctr32_enc8x
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Loop_ctr32_enc8x:
|
|
 |
83c29f |
+ vcipher $out0,$out0,v24
|
|
 |
83c29f |
+ vcipher $out1,$out1,v24
|
|
 |
83c29f |
+ vcipher $out2,$out2,v24
|
|
 |
83c29f |
+ vcipher $out3,$out3,v24
|
|
 |
83c29f |
+ vcipher $out4,$out4,v24
|
|
 |
83c29f |
+ vcipher $out5,$out5,v24
|
|
 |
83c29f |
+ vcipher $out6,$out6,v24
|
|
 |
83c29f |
+ vcipher $out7,$out7,v24
|
|
 |
83c29f |
+Loop_ctr32_enc8x_middle:
|
|
 |
83c29f |
+ lvx v24,$x20,$key_ # round[3]
|
|
 |
83c29f |
+ addi $key_,$key_,0x20
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vcipher $out0,$out0,v25
|
|
 |
83c29f |
+ vcipher $out1,$out1,v25
|
|
 |
83c29f |
+ vcipher $out2,$out2,v25
|
|
 |
83c29f |
+ vcipher $out3,$out3,v25
|
|
 |
83c29f |
+ vcipher $out4,$out4,v25
|
|
 |
83c29f |
+ vcipher $out5,$out5,v25
|
|
 |
83c29f |
+ vcipher $out6,$out6,v25
|
|
 |
83c29f |
+ vcipher $out7,$out7,v25
|
|
 |
83c29f |
+ lvx v25,$x10,$key_ # round[4]
|
|
 |
83c29f |
+ bdnz Loop_ctr32_enc8x
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ subic r11,$len,256 # $len-256, borrow $key_
|
|
 |
83c29f |
+ vcipher $out0,$out0,v24
|
|
 |
83c29f |
+ vcipher $out1,$out1,v24
|
|
 |
83c29f |
+ vcipher $out2,$out2,v24
|
|
 |
83c29f |
+ vcipher $out3,$out3,v24
|
|
 |
83c29f |
+ vcipher $out4,$out4,v24
|
|
 |
83c29f |
+ vcipher $out5,$out5,v24
|
|
 |
83c29f |
+ vcipher $out6,$out6,v24
|
|
 |
83c29f |
+ vcipher $out7,$out7,v24
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ subfe r0,r0,r0 # borrow?-1:0
|
|
 |
83c29f |
+ vcipher $out0,$out0,v25
|
|
 |
83c29f |
+ vcipher $out1,$out1,v25
|
|
 |
83c29f |
+ vcipher $out2,$out2,v25
|
|
 |
83c29f |
+ vcipher $out3,$out3,v25
|
|
 |
83c29f |
+ vcipher $out4,$out4,v25
|
|
 |
83c29f |
+ vcipher $out5,$out5,v25
|
|
 |
83c29f |
+ vcipher $out6,$out6,v25
|
|
 |
83c29f |
+ vcipher $out7,$out7,v25
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ and r0,r0,r11
|
|
 |
83c29f |
+ addi $key_,$sp,$FRAME+15 # rewind $key_
|
|
 |
83c29f |
+ vcipher $out0,$out0,v26
|
|
 |
83c29f |
+ vcipher $out1,$out1,v26
|
|
 |
83c29f |
+ vcipher $out2,$out2,v26
|
|
 |
83c29f |
+ vcipher $out3,$out3,v26
|
|
 |
83c29f |
+ vcipher $out4,$out4,v26
|
|
 |
83c29f |
+ vcipher $out5,$out5,v26
|
|
 |
83c29f |
+ vcipher $out6,$out6,v26
|
|
 |
83c29f |
+ vcipher $out7,$out7,v26
|
|
 |
83c29f |
+ lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ subic $len,$len,129 # $len-=129
|
|
 |
83c29f |
+ vcipher $out0,$out0,v27
|
|
 |
83c29f |
+ addi $len,$len,1 # $len-=128 really
|
|
 |
83c29f |
+ vcipher $out1,$out1,v27
|
|
 |
83c29f |
+ vcipher $out2,$out2,v27
|
|
 |
83c29f |
+ vcipher $out3,$out3,v27
|
|
 |
83c29f |
+ vcipher $out4,$out4,v27
|
|
 |
83c29f |
+ vcipher $out5,$out5,v27
|
|
 |
83c29f |
+ vcipher $out6,$out6,v27
|
|
 |
83c29f |
+ vcipher $out7,$out7,v27
|
|
 |
83c29f |
+ lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vcipher $out0,$out0,v28
|
|
 |
83c29f |
+ lvx_u $in0,$x00,$inp # load input
|
|
 |
83c29f |
+ vcipher $out1,$out1,v28
|
|
 |
83c29f |
+ lvx_u $in1,$x10,$inp
|
|
 |
83c29f |
+ vcipher $out2,$out2,v28
|
|
 |
83c29f |
+ lvx_u $in2,$x20,$inp
|
|
 |
83c29f |
+ vcipher $out3,$out3,v28
|
|
 |
83c29f |
+ lvx_u $in3,$x30,$inp
|
|
 |
83c29f |
+ vcipher $out4,$out4,v28
|
|
 |
83c29f |
+ lvx_u $in4,$x40,$inp
|
|
 |
83c29f |
+ vcipher $out5,$out5,v28
|
|
 |
83c29f |
+ lvx_u $in5,$x50,$inp
|
|
 |
83c29f |
+ vcipher $out6,$out6,v28
|
|
 |
83c29f |
+ lvx_u $in6,$x60,$inp
|
|
 |
83c29f |
+ vcipher $out7,$out7,v28
|
|
 |
83c29f |
+ lvx_u $in7,$x70,$inp
|
|
 |
83c29f |
+ addi $inp,$inp,0x80
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vcipher $out0,$out0,v29
|
|
 |
83c29f |
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
 |
83c29f |
+ vcipher $out1,$out1,v29
|
|
 |
83c29f |
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
 |
83c29f |
+ vcipher $out2,$out2,v29
|
|
 |
83c29f |
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
 |
83c29f |
+ vcipher $out3,$out3,v29
|
|
 |
83c29f |
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
 |
83c29f |
+ vcipher $out4,$out4,v29
|
|
 |
83c29f |
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
 |
83c29f |
+ vcipher $out5,$out5,v29
|
|
 |
83c29f |
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
 |
83c29f |
+ vcipher $out6,$out6,v29
|
|
 |
83c29f |
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
 |
83c29f |
+ vcipher $out7,$out7,v29
|
|
 |
83c29f |
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ add $inp,$inp,r0 # $inp is adjusted in such
|
|
 |
83c29f |
+ # way that at exit from the
|
|
 |
83c29f |
+ # loop inX-in7 are loaded
|
|
 |
83c29f |
+ # with last "words"
|
|
 |
83c29f |
+ subfe. r0,r0,r0 # borrow?-1:0
|
|
 |
83c29f |
+ vcipher $out0,$out0,v30
|
|
 |
83c29f |
+ vxor $in0,$in0,v31 # xor with last round key
|
|
 |
83c29f |
+ vcipher $out1,$out1,v30
|
|
 |
83c29f |
+ vxor $in1,$in1,v31
|
|
 |
83c29f |
+ vcipher $out2,$out2,v30
|
|
 |
83c29f |
+ vxor $in2,$in2,v31
|
|
 |
83c29f |
+ vcipher $out3,$out3,v30
|
|
 |
83c29f |
+ vxor $in3,$in3,v31
|
|
 |
83c29f |
+ vcipher $out4,$out4,v30
|
|
 |
83c29f |
+ vxor $in4,$in4,v31
|
|
 |
83c29f |
+ vcipher $out5,$out5,v30
|
|
 |
83c29f |
+ vxor $in5,$in5,v31
|
|
 |
83c29f |
+ vcipher $out6,$out6,v30
|
|
 |
83c29f |
+ vxor $in6,$in6,v31
|
|
 |
83c29f |
+ vcipher $out7,$out7,v30
|
|
 |
83c29f |
+ vxor $in7,$in7,v31
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ bne Lctr32_enc8x_break # did $len-129 borrow?
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vcipherlast $in0,$out0,$in0
|
|
 |
83c29f |
+ vcipherlast $in1,$out1,$in1
|
|
 |
83c29f |
+ vadduwm $out1,$ivec,$one # counter values ...
|
|
 |
83c29f |
+ vcipherlast $in2,$out2,$in2
|
|
 |
83c29f |
+ vadduwm $out2,$ivec,$two
|
|
 |
83c29f |
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
|
 |
83c29f |
+ vcipherlast $in3,$out3,$in3
|
|
 |
83c29f |
+ vadduwm $out3,$out1,$two
|
|
 |
83c29f |
+ vxor $out1,$out1,$rndkey0
|
|
 |
83c29f |
+ vcipherlast $in4,$out4,$in4
|
|
 |
83c29f |
+ vadduwm $out4,$out2,$two
|
|
 |
83c29f |
+ vxor $out2,$out2,$rndkey0
|
|
 |
83c29f |
+ vcipherlast $in5,$out5,$in5
|
|
 |
83c29f |
+ vadduwm $out5,$out3,$two
|
|
 |
83c29f |
+ vxor $out3,$out3,$rndkey0
|
|
 |
83c29f |
+ vcipherlast $in6,$out6,$in6
|
|
 |
83c29f |
+ vadduwm $out6,$out4,$two
|
|
 |
83c29f |
+ vxor $out4,$out4,$rndkey0
|
|
 |
83c29f |
+ vcipherlast $in7,$out7,$in7
|
|
 |
83c29f |
+ vadduwm $out7,$out5,$two
|
|
 |
83c29f |
+ vxor $out5,$out5,$rndkey0
|
|
 |
83c29f |
+ le?vperm $in0,$in0,$in0,$inpperm
|
|
 |
83c29f |
+ vadduwm $ivec,$out6,$two # next counter value
|
|
 |
83c29f |
+ vxor $out6,$out6,$rndkey0
|
|
 |
83c29f |
+ le?vperm $in1,$in1,$in1,$inpperm
|
|
 |
83c29f |
+ vxor $out7,$out7,$rndkey0
|
|
 |
83c29f |
+ mtctr $rounds
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ vcipher $out0,$out0,v24
|
|
 |
83c29f |
+ stvx_u $in0,$x00,$out
|
|
 |
83c29f |
+ le?vperm $in2,$in2,$in2,$inpperm
|
|
 |
83c29f |
+ vcipher $out1,$out1,v24
|
|
 |
83c29f |
+ stvx_u $in1,$x10,$out
|
|
 |
83c29f |
+ le?vperm $in3,$in3,$in3,$inpperm
|
|
 |
83c29f |
+ vcipher $out2,$out2,v24
|
|
 |
83c29f |
+ stvx_u $in2,$x20,$out
|
|
 |
83c29f |
+ le?vperm $in4,$in4,$in4,$inpperm
|
|
 |
83c29f |
+ vcipher $out3,$out3,v24
|
|
 |
83c29f |
+ stvx_u $in3,$x30,$out
|
|
 |
83c29f |
+ le?vperm $in5,$in5,$in5,$inpperm
|
|
 |
83c29f |
+ vcipher $out4,$out4,v24
|
|
 |
83c29f |
+ stvx_u $in4,$x40,$out
|
|
 |
83c29f |
+ le?vperm $in6,$in6,$in6,$inpperm
|
|
 |
83c29f |
+ vcipher $out5,$out5,v24
|
|
 |
83c29f |
+ stvx_u $in5,$x50,$out
|
|
 |
83c29f |
+ le?vperm $in7,$in7,$in7,$inpperm
|
|
 |
83c29f |
+ vcipher $out6,$out6,v24
|
|
 |
83c29f |
+ stvx_u $in6,$x60,$out
|
|
 |
83c29f |
+ vcipher $out7,$out7,v24
|
|
 |
83c29f |
+ stvx_u $in7,$x70,$out
|
|
 |
83c29f |
+ addi $out,$out,0x80
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ b Loop_ctr32_enc8x_middle
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lctr32_enc8x_break:
|
|
 |
83c29f |
+ cmpwi $len,-0x60
|
|
 |
83c29f |
+ blt Lctr32_enc8x_one
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+ beq Lctr32_enc8x_two
|
|
 |
83c29f |
+ cmpwi $len,-0x40
|
|
 |
83c29f |
+ blt Lctr32_enc8x_three
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+ beq Lctr32_enc8x_four
|
|
 |
83c29f |
+ cmpwi $len,-0x20
|
|
 |
83c29f |
+ blt Lctr32_enc8x_five
|
|
 |
83c29f |
+ nop
|
|
 |
83c29f |
+ beq Lctr32_enc8x_six
|
|
 |
83c29f |
+ cmpwi $len,0x00
|
|
 |
83c29f |
+ blt Lctr32_enc8x_seven
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+Lctr32_enc8x_eight:
|
|
 |
83c29f |
+ vcipherlast $out0,$out0,$in0
|
|
 |
83c29f |
+ vcipherlast $out1,$out1,$in1
|
|
 |
83c29f |
+ vcipherlast $out2,$out2,$in2
|
|
 |
83c29f |
+ vcipherlast $out3,$out3,$in3
|
|
 |
83c29f |
+ vcipherlast $out4,$out4,$in4
|
|
 |
83c29f |
+ vcipherlast $out5,$out5,$in5
|
|
 |
83c29f |
+ vcipherlast $out6,$out6,$in6
|
|
 |
83c29f |
+ vcipherlast $out7,$out7,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
 |
83c29f |
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
 |
83c29f |
+ stvx_u $out0,$x00,$out
|
|
 |
83c29f |
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
 |
83c29f |
+ stvx_u $out1,$x10,$out
|
|
 |
83c29f |
+ le?vperm $out3,$out3,$out3,$inpperm
|
|
 |
83c29f |
+ stvx_u $out2,$x20,$out
|
|
 |
83c29f |
+ le?vperm $out4,$out4,$out4,$inpperm
|
|
 |
83c29f |
+ stvx_u $out3,$x30,$out
|
|
 |
83c29f |
+ le?vperm $out5,$out5,$out5,$inpperm
|
|
 |
83c29f |
+ stvx_u $out4,$x40,$out
|
|
 |
83c29f |
+ le?vperm $out6,$out6,$out6,$inpperm
|
|
 |
83c29f |
+ stvx_u $out5,$x50,$out
|
|
 |
83c29f |
+ le?vperm $out7,$out7,$out7,$inpperm
|
|
 |
83c29f |
+ stvx_u $out6,$x60,$out
|
|
 |
83c29f |
+ stvx_u $out7,$x70,$out
|
|
 |
83c29f |
+ addi $out,$out,0x80
|
|
 |
83c29f |
+ b Lctr32_enc8x_done
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+.align 5
|
|
 |
83c29f |
+Lctr32_enc8x_seven:
|
|
 |
83c29f |
+ vcipherlast $out0,$out0,$in1
|
|
 |
83c29f |
+ vcipherlast $out1,$out1,$in2
|
|
 |
83c29f |
+ vcipherlast $out2,$out2,$in3
|
|
 |
83c29f |
+ vcipherlast $out3,$out3,$in4
|
|
 |
83c29f |
+ vcipherlast $out4,$out4,$in5
|
|
 |
83c29f |
+ vcipherlast $out5,$out5,$in6
|
|
 |
83c29f |
+ vcipherlast $out6,$out6,$in7
|
|
 |
83c29f |
+
|
|
 |
83c29f |
+ le?vperm $out0,$out0,$out0,$inpperm
|
|
 |
83c29f |
+ le?vperm $out1,$out1,$out1,$inpperm
|
|
 |
83c29f |
+ stvx_u $out0,$x00,$out
|
|
 |
83c29f |
+ le?vperm $out2,$out2,$out2,$inpperm
|
|
 |
83c29f |
+ stvx_u $out1,$x10,$out
|
|