From 83c29f9ce7ace5691126ec2556b9b73af85361f0 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Mar 05 2015 13:24:32 +0000 Subject: import openssl-1.0.1e-42.el7 --- diff --git a/SOURCES/openssl-1.0.1-beta2-rpmbuild.patch b/SOURCES/openssl-1.0.1-beta2-rpmbuild.patch deleted file mode 100644 index a4bb691..0000000 --- a/SOURCES/openssl-1.0.1-beta2-rpmbuild.patch +++ /dev/null @@ -1,110 +0,0 @@ -diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure ---- openssl-1.0.1-beta2/Configure.rpmbuild 2012-01-05 01:07:34.000000000 +0100 -+++ openssl-1.0.1-beta2/Configure 2012-02-02 12:43:56.547409325 +0100 -@@ -343,23 +343,23 @@ my %table=( - #### - # *-generic* is endian-neutral target, but ./config is free to - # throw in -D[BL]_ENDIAN, whichever appropriate... --"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", --"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"linux-generic32","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", -+"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - # It's believed that majority of ARM toolchains predefine appropriate -march. - # If you compiler does not, do complement config command line with one! --"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"linux-armv4", "gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - #### IA-32 targets... - "linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", --"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - "linux-aout", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -march=i486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out", - #### --"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", --"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", --"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"linux-generic64","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", -+"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", -+"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", - "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", --"linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", --"linux64-s390x", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", -+"linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", -+"linux64-s390x", "gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", - #### So called "highgprs" target for z/Architecture CPUs - # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see - # /proc/cpuinfo. The idea is to preserve most significant bits of -@@ -373,16 +373,17 @@ my %table=( - # ldconfig and run-time linker to autodiscover. Unfortunately it - # doesn't work just yet, because of couple of bugs in glibc - # sysdeps/s390/dl-procinfo.c affecting ldconfig and ld.so.1... --"linux32-s390x", "gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/highgprs", -+"linux32-s390x", "gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::/highgprs", - #### SPARC Linux setups - # Ray Miller has patiently - # assisted with debugging of following two configs. --"linux-sparcv8","gcc:-mv8 -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"linux-sparcv8","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - # it's a real mess with -mcpu=ultrasparc option under Linux, but - # -Wa,-Av8plus should do the trick no matter what. --"linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plus -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"linux-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - # GCC 3.1 is a requirement --"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", -+"linux64-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", -+"linux-aarch64","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", - #### Alpha Linux with GNU C and Compaq C setups - # Special notes: - # - linux-alpha+bwx-gcc is ment to be used from ./config only. If you -@@ -396,8 +397,8 @@ my %table=( - # - # - # --"linux-alpha-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", --"linux-alpha+bwx-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -+"linux-alpha-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", -+"linux-alpha+bwx-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", - "linux-alpha-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", - "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", - -@@ -1678,7 +1679,7 @@ while () - elsif ($shared_extension ne "" && $shared_extension =~ /^\.s([ol])\.[^\.]*\.[^\.]*$/) - { - my $sotmp = $1; -- s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_MAJOR) .s$sotmp/; -+ s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_SONAMEVER) .s$sotmp/; - } - elsif ($shared_extension ne "" && $shared_extension =~ /^\.[^\.]*\.[^\.]*\.dylib$/) - { -diff -up openssl-1.0.1-beta2/Makefile.org.rpmbuild openssl-1.0.1-beta2/Makefile.org ---- openssl-1.0.1-beta2/Makefile.org.rpmbuild 2011-12-27 16:17:50.000000000 +0100 -+++ openssl-1.0.1-beta2/Makefile.org 2012-02-02 12:30:23.652495435 +0100 -@@ -10,6 +10,7 @@ SHLIB_VERSION_HISTORY= - SHLIB_MAJOR= - SHLIB_MINOR= - SHLIB_EXT= -+SHLIB_SONAMEVER=10 - PLATFORM=dist - OPTIONS= - CONFIGURE_ARGS= -@@ -333,10 +334,9 @@ clean-shared: - link-shared: - @ set -e; for i in $(SHLIBDIRS); do \ - $(MAKE) -f $(HERE)/Makefile.shared -e $(BUILDENV) \ -- LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \ -+ LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \ - LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \ - symlink.$(SHLIB_TARGET); \ -- libs="$$libs -l$$i"; \ - done - - build-shared: do_$(SHLIB_TARGET) link-shared -@@ -347,7 +347,7 @@ do_$(SHLIB_TARGET): - libs="$(LIBKRB5) $$libs"; \ - fi; \ - $(CLEARENV) && $(MAKE) -f Makefile.shared -e $(BUILDENV) \ -- LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \ -+ LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \ - LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \ - LIBDEPS="$$libs $(EX_LIBS)" \ - link_a.$(SHLIB_TARGET); \ diff --git a/SOURCES/openssl-1.0.1e-cc-reqs.patch b/SOURCES/openssl-1.0.1e-cc-reqs.patch new file mode 100644 index 0000000..e266bb1 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-cc-reqs.patch @@ -0,0 +1,25 @@ +diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.cc-reqs openssl-1.0.1e/crypto/rsa/rsa_gen.c +--- openssl-1.0.1e/crypto/rsa/rsa_gen.c.cc-reqs 2015-01-13 12:45:51.000000000 +0100 ++++ openssl-1.0.1e/crypto/rsa/rsa_gen.c 2015-01-15 17:35:04.649697922 +0100 +@@ -438,6 +438,10 @@ static int rsa_builtin_keygen(RSA *rsa, + if(!rsa->dmq1 && ((rsa->dmq1=BN_new()) == NULL)) goto err; + if(!rsa->iqmp && ((rsa->iqmp=BN_new()) == NULL)) goto err; + ++ /* prepare minimum p and q difference */ ++ if (!BN_one(r3)) goto err; ++ if (bitsp > 100 && !BN_lshift(r3, r3, bitsp - 100)) goto err; ++ + BN_copy(rsa->e, e_value); + + /* generate p and q */ +@@ -463,7 +467,9 @@ static int rsa_builtin_keygen(RSA *rsa, + { + if(!BN_generate_prime_ex(rsa->q, bitsq, 0, NULL, NULL, cb)) + goto err; +- } while((BN_cmp(rsa->p, rsa->q) == 0) && (++degenerate < 3)); ++ if (!BN_sub(r2, rsa->q, rsa->p)) ++ goto err; ++ } while((BN_ucmp(r2, r3) <= 0) && (++degenerate < 3)); + if(degenerate == 3) + { + ok = 0; /* we set our own err */ diff --git a/SOURCES/openssl-1.0.1e-copy-algo.patch b/SOURCES/openssl-1.0.1e-copy-algo.patch new file mode 100644 index 0000000..927c584 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-copy-algo.patch @@ -0,0 +1,33 @@ +diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c +index 6a33b9d..76a5f9e 100644 +--- a/ssl/ssl_lib.c ++++ b/ssl/ssl_lib.c +@@ -3177,15 +3177,26 @@ SSL_CTX *SSL_get_SSL_CTX(const SSL *ssl) + + SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX* ctx) + { ++ CERT *ocert = ssl->cert; + if (ssl->ctx == ctx) + return ssl->ctx; + #ifndef OPENSSL_NO_TLSEXT + if (ctx == NULL) + ctx = ssl->initial_ctx; + #endif +- if (ssl->cert != NULL) +- ssl_cert_free(ssl->cert); + ssl->cert = ssl_cert_dup(ctx->cert); ++ if (ocert) ++ { ++ int i; ++ /* Copy negotiated digests from original */ ++ for (i = 0; i < SSL_PKEY_NUM; i++) ++ { ++ CERT_PKEY *cpk = ocert->pkeys + i; ++ CERT_PKEY *rpk = ssl->cert->pkeys + i; ++ rpk->digest = cpk->digest; ++ } ++ ssl_cert_free(ocert); ++ } + CRYPTO_add(&ctx->references,1,CRYPTO_LOCK_SSL_CTX); + if (ssl->ctx != NULL) + SSL_CTX_free(ssl->ctx); /* decrement reference count */ diff --git a/SOURCES/openssl-1.0.1e-cve-2014-0224.patch b/SOURCES/openssl-1.0.1e-cve-2014-0224.patch index 173f0e1..05e7e79 100644 --- a/SOURCES/openssl-1.0.1e-cve-2014-0224.patch +++ b/SOURCES/openssl-1.0.1e-cve-2014-0224.patch @@ -12,7 +12,15 @@ diff -up openssl-1.0.1e/ssl/ssl3.h.keying-mitm openssl-1.0.1e/ssl/ssl3.h diff -up openssl-1.0.1e/ssl/s3_clnt.c.keying-mitm openssl-1.0.1e/ssl/s3_clnt.c --- openssl-1.0.1e/ssl/s3_clnt.c.keying-mitm 2013-02-11 16:26:04.000000000 +0100 +++ openssl-1.0.1e/ssl/s3_clnt.c 2014-06-02 19:49:57.042701985 +0200 -@@ -559,6 +559,7 @@ int ssl3_connect(SSL *s) +@@ -510,6 +510,7 @@ int ssl3_connect(SSL *s) + s->method->ssl3_enc->client_finished_label, + s->method->ssl3_enc->client_finished_label_len); + if (ret <= 0) goto end; ++ s->s3->flags |= SSL3_FLAGS_CCS_OK; + s->state=SSL3_ST_CW_FLUSH; + + /* clear flags */ +@@ -559,6 +560,7 @@ int ssl3_connect(SSL *s) case SSL3_ST_CR_FINISHED_A: case SSL3_ST_CR_FINISHED_B: @@ -20,7 +28,15 @@ diff -up openssl-1.0.1e/ssl/s3_clnt.c.keying-mitm openssl-1.0.1e/ssl/s3_clnt.c ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A, SSL3_ST_CR_FINISHED_B); if (ret <= 0) goto end; -@@ -916,6 +917,7 @@ int ssl3_get_server_hello(SSL *s) +@@ -901,6 +903,7 @@ int ssl3_get_server_hello(SSL *s) + { + s->session->cipher = pref_cipher ? + pref_cipher : ssl_get_cipher_by_char(s, p+j); ++ s->s3->flags |= SSL3_FLAGS_CCS_OK; + } + } + #endif /* OPENSSL_NO_TLSEXT */ +@@ -916,6 +918,7 @@ int ssl3_get_server_hello(SSL *s) SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,SSL_R_ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT); goto f_err; } diff --git a/SOURCES/openssl-1.0.1e-doc-ciphersuites.patch b/SOURCES/openssl-1.0.1e-doc-ciphersuites.patch new file mode 100644 index 0000000..418b9e1 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-doc-ciphersuites.patch @@ -0,0 +1,304 @@ +From 87887a7a658bf305bfe6619eedcbc6c3972cc188 Mon Sep 17 00:00:00 2001 +From: Hubert Kario +Date: Tue, 10 Jun 2014 14:13:33 +0200 +Subject: [PATCH] backport changes to ciphers(1) man page + +Backport of the patch: +add ECC strings to ciphers(1), point out difference between DH and ECDH +and few other changes applicable to the 1.0.1 code base. + + * Make a clear distinction between DH and ECDH key exchange. + * Group all key exchange cipher suite identifiers, first DH then ECDH + * add descriptions for all supported *DH* identifiers + * add ECDSA authentication descriptions + * add example showing how to disable all suites that offer no + authentication or encryption + * backport listing of elliptic curve cipher suites. + * backport listing of TLS 1.2 cipher suites, add note that DH_RSA + and DH_DSS is not implemented in this version + * backport of description of PSK and listing of PSK cipher suites + * backport description of AES128, AES256 and AESGCM options + * backport description of CAMELLIA128, CAMELLIA256 options +--- + doc/apps/ciphers.pod | 195 ++++++++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 173 insertions(+), 22 deletions(-) + +diff --git a/doc/apps/ciphers.pod b/doc/apps/ciphers.pod +index f44aa00..6086d0a 100644 +--- a/doc/apps/ciphers.pod ++++ b/doc/apps/ciphers.pod +@@ -36,7 +36,7 @@ SSL v2 and for SSL v3/TLS v1. + + =item B<-V> + +-Like B<-V>, but include cipher suite codes in output (hex format). ++Like B<-v>, but include cipher suite codes in output (hex format). + + =item B<-ssl3> + +@@ -116,8 +116,8 @@ specified. + =item B + + the ciphers included in B, but not enabled by default. Currently +-this is B. Note that this rule does not cover B, which is +-not included by B (use B if necessary). ++this is B and B. Note that this rule does not cover B, ++which is not included by B (use B if necessary). + + =item B + +@@ -165,21 +165,58 @@ included. + =item B + + the cipher suites offering no authentication. This is currently the anonymous +-DH algorithms. These cipher suites are vulnerable to a "man in the middle" +-attack and so their use is normally discouraged. ++DH algorithms and anonymous ECDH algorithms. These cipher suites are vulnerable ++to a "man in the middle" attack and so their use is normally discouraged. + + =item B, B + + cipher suites using RSA key exchange. + ++=item B, B, B ++ ++cipher suites using DH key agreement and DH certificates signed by CAs with RSA ++and DSS keys or either respectively. Not implemented. ++ + =item B + +-cipher suites using ephemeral DH key agreement. ++cipher suites using ephemeral DH key agreement, including anonymous cipher ++suites. + +-=item B, B ++=item B + +-cipher suites using DH key agreement and DH certificates signed by CAs with RSA +-and DSS keys respectively. Not implemented. ++cipher suites using authenticated ephemeral DH key agreement. ++ ++=item B ++ ++anonymous DH cipher suites, note that this does not include anonymous Elliptic ++Curve DH (ECDH) cipher suites. ++ ++=item B ++ ++cipher suites using DH, including anonymous DH, ephemeral DH and fixed DH. ++ ++=item B, B, B ++ ++cipher suites using fixed ECDH key agreement signed by CAs with RSA and ECDSA ++keys or either respectively. ++ ++=item B ++ ++cipher suites using ephemeral ECDH key agreement, including anonymous ++cipher suites. ++ ++=item B ++ ++cipher suites using authenticated ephemeral ECDH key agreement. ++ ++=item B ++ ++anonymous Elliptic Curve Diffie Hellman cipher suites. ++ ++=item B ++ ++cipher suites using ECDH key exchange, including anonymous, ephemeral and ++fixed ECDH. + + =item B + +@@ -194,30 +231,39 @@ cipher suites using DSS authentication, i.e. the certificates carry DSS keys. + cipher suites effectively using DH authentication, i.e. the certificates carry + DH keys. Not implemented. + ++=item B ++ ++cipher suites effectively using ECDH authentication, i.e. the certificates ++carry ECDH keys. ++ ++=item B, B ++ ++cipher suites using ECDSA authentication, i.e. the certificates carry ECDSA ++keys. ++ + =item B, B, B, B + + ciphers suites using FORTEZZA key exchange, authentication, encryption or all + FORTEZZA algorithms. Not implemented. + +-=item B, B, B +- +-TLS v1.0, SSL v3.0 or SSL v2.0 cipher suites respectively. ++=item B, B, B, B + +-=item B +- +-cipher suites using DH, including anonymous DH. ++TLS v1.2, TLS v1.0, SSL v3.0 or SSL v2.0 cipher suites respectively. Note: ++there are no ciphersuites specific to TLS v1.1. + +-=item B ++=item B, B, B + +-anonymous DH cipher suites. ++cipher suites using 128 bit AES, 256 bit AES or either 128 or 256 bit AES. + +-=item B ++=item B + +-cipher suites using AES. ++AES in Galois Counter Mode (GCM): these ciphersuites are only supported ++in TLS v1.2. + +-=item B ++=item B, B, B + +-cipher suites using Camellia. ++cipher suites using 128 bit CAMELLIA, 256 bit CAMELLIA or either 128 or 256 bit ++CAMELLIA. + + =item B<3DES> + +@@ -251,6 +297,10 @@ cipher suites using MD5. + + cipher suites using SHA1. + ++=item B, B ++ ++ciphersuites using SHA256 or SHA384. ++ + =item B + + cipher suites using GOST R 34.10 (either 2001 or 94) for authenticaction +@@ -277,6 +327,9 @@ cipher suites, using HMAC based on GOST R 34.11-94. + + cipher suites using GOST 28147-89 MAC B HMAC. + ++=item B ++ ++cipher suites using pre-shared keys (PSK). + + =back + +@@ -423,7 +476,100 @@ Note: these ciphers can also be used in SSL v3. + TLS_DHE_DSS_EXPORT1024_WITH_RC4_56_SHA EXP1024-DHE-DSS-RC4-SHA + TLS_DHE_DSS_WITH_RC4_128_SHA DHE-DSS-RC4-SHA + +-=head2 SSL v2.0 cipher suites. ++=head2 Elliptic curve cipher suites. ++ ++ TLS_ECDH_RSA_WITH_NULL_SHA ECDH-RSA-NULL-SHA ++ TLS_ECDH_RSA_WITH_RC4_128_SHA ECDH-RSA-RC4-SHA ++ TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA ECDH-RSA-DES-CBC3-SHA ++ TLS_ECDH_RSA_WITH_AES_128_CBC_SHA ECDH-RSA-AES128-SHA ++ TLS_ECDH_RSA_WITH_AES_256_CBC_SHA ECDH-RSA-AES256-SHA ++ ++ TLS_ECDH_ECDSA_WITH_NULL_SHA ECDH-ECDSA-NULL-SHA ++ TLS_ECDH_ECDSA_WITH_RC4_128_SHA ECDH-ECDSA-RC4-SHA ++ TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA ECDH-ECDSA-DES-CBC3-SHA ++ TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA ECDH-ECDSA-AES128-SHA ++ TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA ECDH-ECDSA-AES256-SHA ++ ++ TLS_ECDHE_RSA_WITH_NULL_SHA ECDHE-RSA-NULL-SHA ++ TLS_ECDHE_RSA_WITH_RC4_128_SHA ECDHE-RSA-RC4-SHA ++ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA ECDHE-RSA-DES-CBC3-SHA ++ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA ECDHE-RSA-AES128-SHA ++ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA ECDHE-RSA-AES256-SHA ++ ++ TLS_ECDHE_ECDSA_WITH_NULL_SHA ECDHE-ECDSA-NULL-SHA ++ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA ECDHE-ECDSA-RC4-SHA ++ TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA ECDHE-ECDSA-DES-CBC3-SHA ++ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA ECDHE-ECDSA-AES128-SHA ++ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA ECDHE-ECDSA-AES256-SHA ++ ++ TLS_ECDH_anon_WITH_NULL_SHA AECDH-NULL-SHA ++ TLS_ECDH_anon_WITH_RC4_128_SHA AECDH-RC4-SHA ++ TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA AECDH-DES-CBC3-SHA ++ TLS_ECDH_anon_WITH_AES_128_CBC_SHA AECDH-AES128-SHA ++ TLS_ECDH_anon_WITH_AES_256_CBC_SHA AECDH-AES256-SHA ++ ++=head2 TLS v1.2 cipher suites ++ ++ TLS_RSA_WITH_NULL_SHA256 NULL-SHA256 ++ ++ TLS_RSA_WITH_AES_128_CBC_SHA256 AES128-SHA256 ++ TLS_RSA_WITH_AES_256_CBC_SHA256 AES256-SHA256 ++ TLS_RSA_WITH_AES_128_GCM_SHA256 AES128-GCM-SHA256 ++ TLS_RSA_WITH_AES_256_GCM_SHA384 AES256-GCM-SHA384 ++ ++ TLS_DH_RSA_WITH_AES_128_CBC_SHA256 Not implemented. ++ TLS_DH_RSA_WITH_AES_256_CBC_SHA256 Not implemented. ++ TLS_DH_RSA_WITH_AES_128_GCM_SHA256 Not implemented. ++ TLS_DH_RSA_WITH_AES_256_GCM_SHA384 Not implemented. ++ ++ TLS_DH_DSS_WITH_AES_128_CBC_SHA256 Not implemented. ++ TLS_DH_DSS_WITH_AES_256_CBC_SHA256 Not implemented. ++ TLS_DH_DSS_WITH_AES_128_GCM_SHA256 Not implemented. ++ TLS_DH_DSS_WITH_AES_256_GCM_SHA384 Not implemented. ++ ++ TLS_DHE_RSA_WITH_AES_128_CBC_SHA256 DHE-RSA-AES128-SHA256 ++ TLS_DHE_RSA_WITH_AES_256_CBC_SHA256 DHE-RSA-AES256-SHA256 ++ TLS_DHE_RSA_WITH_AES_128_GCM_SHA256 DHE-RSA-AES128-GCM-SHA256 ++ TLS_DHE_RSA_WITH_AES_256_GCM_SHA384 DHE-RSA-AES256-GCM-SHA384 ++ ++ TLS_DHE_DSS_WITH_AES_128_CBC_SHA256 DHE-DSS-AES128-SHA256 ++ TLS_DHE_DSS_WITH_AES_256_CBC_SHA256 DHE-DSS-AES256-SHA256 ++ TLS_DHE_DSS_WITH_AES_128_GCM_SHA256 DHE-DSS-AES128-GCM-SHA256 ++ TLS_DHE_DSS_WITH_AES_256_GCM_SHA384 DHE-DSS-AES256-GCM-SHA384 ++ ++ TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256 ECDH-RSA-AES128-SHA256 ++ TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384 ECDH-RSA-AES256-SHA384 ++ TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256 ECDH-RSA-AES128-GCM-SHA256 ++ TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384 ECDH-RSA-AES256-GCM-SHA384 ++ ++ TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256 ECDH-ECDSA-AES128-SHA256 ++ TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384 ECDH-ECDSA-AES256-SHA384 ++ TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256 ECDH-ECDSA-AES128-GCM-SHA256 ++ TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384 ECDH-ECDSA-AES256-GCM-SHA384 ++ ++ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 ECDHE-RSA-AES128-SHA256 ++ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384 ECDHE-RSA-AES256-SHA384 ++ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 ECDHE-RSA-AES128-GCM-SHA256 ++ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 ECDHE-RSA-AES256-GCM-SHA384 ++ ++ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 ECDHE-ECDSA-AES128-SHA256 ++ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 ECDHE-ECDSA-AES256-SHA384 ++ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 ECDHE-ECDSA-AES128-GCM-SHA256 ++ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 ECDHE-ECDSA-AES256-GCM-SHA384 ++ ++ TLS_DH_anon_WITH_AES_128_CBC_SHA256 ADH-AES128-SHA256 ++ TLS_DH_anon_WITH_AES_256_CBC_SHA256 ADH-AES256-SHA256 ++ TLS_DH_anon_WITH_AES_128_GCM_SHA256 ADH-AES128-GCM-SHA256 ++ TLS_DH_anon_WITH_AES_256_GCM_SHA384 ADH-AES256-GCM-SHA384 ++ ++=head2 Pre shared keying (PSK) cipheruites ++ ++ TLS_PSK_WITH_RC4_128_SHA PSK-RC4-SHA ++ TLS_PSK_WITH_3DES_EDE_CBC_SHA PSK-3DES-EDE-CBC-SHA ++ TLS_PSK_WITH_AES_128_CBC_SHA PSK-AES128-CBC-SHA ++ TLS_PSK_WITH_AES_256_CBC_SHA PSK-AES256-CBC-SHA ++ ++=head2 Deprecated SSL v2.0 cipher suites. + + SSL_CK_RC4_128_WITH_MD5 RC4-MD5 + SSL_CK_RC4_128_EXPORT40_WITH_MD5 EXP-RC4-MD5 +@@ -452,6 +598,11 @@ strength: + + openssl ciphers -v 'ALL:!ADH:@STRENGTH' + ++Include all ciphers except ones with no encryption (eNULL) or no ++authentication (aNULL): ++ ++ openssl ciphers -v 'ALL:!aNULL' ++ + Include only 3DES ciphers and then place RSA ciphers last: + + openssl ciphers -v '3DES:+RSA' +-- +1.7.9.5 + diff --git a/SOURCES/openssl-1.0.1e-dtls-ecc-ext.patch b/SOURCES/openssl-1.0.1e-dtls-ecc-ext.patch new file mode 100644 index 0000000..2a002cc --- /dev/null +++ b/SOURCES/openssl-1.0.1e-dtls-ecc-ext.patch @@ -0,0 +1,119 @@ +From 2054eb771ea29378f90d3a77c2f4015b17de702d Mon Sep 17 00:00:00 2001 +From: "Dr. Stephen Henson" +Date: Tue, 15 Jul 2014 12:20:30 +0100 +Subject: [PATCH] Add ECC extensions with DTLS. + +PR#3449 +--- + ssl/d1_clnt.c | 8 +++++++- + ssl/d1_srvr.c | 5 +++++ + ssl/t1_lib.c | 18 ++++++------------ + 3 files changed, 18 insertions(+), 13 deletions(-) + +diff --git a/ssl/d1_clnt.c b/ssl/d1_clnt.c +index 48e5e06..65dbb4a 100644 +--- a/ssl/d1_clnt.c ++++ b/ssl/d1_clnt.c +@@ -876,12 +876,18 @@ int dtls1_client_hello(SSL *s) + *(p++)=0; /* Add the NULL method */ + + #ifndef OPENSSL_NO_TLSEXT ++ /* TLS extensions*/ ++ if (ssl_prepare_clienthello_tlsext(s) <= 0) ++ { ++ SSLerr(SSL_F_DTLS1_CLIENT_HELLO,SSL_R_CLIENTHELLO_TLSEXT); ++ goto err; ++ } + if ((p = ssl_add_clienthello_tlsext(s, p, buf+SSL3_RT_MAX_PLAIN_LENGTH)) == NULL) + { + SSLerr(SSL_F_DTLS1_CLIENT_HELLO,ERR_R_INTERNAL_ERROR); + goto err; + } +-#endif ++#endif + + l=(p-d); + d=buf; +diff --git a/ssl/d1_srvr.c b/ssl/d1_srvr.c +index 1384ab0..ef9c347 100644 +--- a/ssl/d1_srvr.c ++++ b/ssl/d1_srvr.c +@@ -980,6 +980,11 @@ int dtls1_send_server_hello(SSL *s) + #endif + + #ifndef OPENSSL_NO_TLSEXT ++ if (ssl_prepare_serverhello_tlsext(s) <= 0) ++ { ++ SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO,SSL_R_SERVERHELLO_TLSEXT); ++ return -1; ++ } + if ((p = ssl_add_serverhello_tlsext(s, p, buf+SSL3_RT_MAX_PLAIN_LENGTH)) == NULL) + { + SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO,ERR_R_INTERNAL_ERROR); +diff --git a/ssl/t1_lib.c b/ssl/t1_lib.c +index f6a480d..8167a51 100644 +--- a/ssl/t1_lib.c ++++ b/ssl/t1_lib.c +@@ -453,8 +453,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, unsigned c + #endif + + #ifndef OPENSSL_NO_EC +- if (s->tlsext_ecpointformatlist != NULL && +- s->version != DTLS1_VERSION) ++ if (s->tlsext_ecpointformatlist != NULL) + { + /* Add TLS extension ECPointFormats to the ClientHello message */ + long lenmax; +@@ -473,8 +472,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, unsigned c + memcpy(ret, s->tlsext_ecpointformatlist, s->tlsext_ecpointformatlist_length); + ret+=s->tlsext_ecpointformatlist_length; + } +- if (s->tlsext_ellipticcurvelist != NULL && +- s->version != DTLS1_VERSION) ++ if (s->tlsext_ellipticcurvelist != NULL) + { + /* Add TLS extension EllipticCurves to the ClientHello message */ + long lenmax; +@@ -750,8 +748,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, unsigned c + } + + #ifndef OPENSSL_NO_EC +- if (s->tlsext_ecpointformatlist != NULL && +- s->version != DTLS1_VERSION) ++ if (s->tlsext_ecpointformatlist != NULL) + { + /* Add TLS extension ECPointFormats to the ServerHello message */ + long lenmax; +@@ -1154,8 +1151,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in + #endif + + #ifndef OPENSSL_NO_EC +- else if (type == TLSEXT_TYPE_ec_point_formats && +- s->version != DTLS1_VERSION) ++ else if (type == TLSEXT_TYPE_ec_point_formats) + { + unsigned char *sdata = data; + int ecpointformatlist_length = *(sdata++); +@@ -1189,8 +1185,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in + fprintf(stderr,"\n"); + #endif + } +- else if (type == TLSEXT_TYPE_elliptic_curves && +- s->version != DTLS1_VERSION) ++ else if (type == TLSEXT_TYPE_elliptic_curves) + { + unsigned char *sdata = data; + int ellipticcurvelist_length = (*(sdata++) << 8); +@@ -1549,8 +1544,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in + } + + #ifndef OPENSSL_NO_EC +- else if (type == TLSEXT_TYPE_ec_point_formats && +- s->version != DTLS1_VERSION) ++ else if (type == TLSEXT_TYPE_ec_point_formats) + { + unsigned char *sdata = data; + int ecpointformatlist_length = *(sdata++); +-- +1.8.3.1 + diff --git a/SOURCES/openssl-1.0.1e-ecdh-auto.patch b/SOURCES/openssl-1.0.1e-ecdh-auto.patch new file mode 100644 index 0000000..2e6ac57 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-ecdh-auto.patch @@ -0,0 +1,248 @@ +diff -up openssl-1.0.1e/apps/s_server.c.ecdh-auto openssl-1.0.1e/apps/s_server.c +--- openssl-1.0.1e/apps/s_server.c.ecdh-auto 2014-09-17 15:52:01.659445244 +0200 ++++ openssl-1.0.1e/apps/s_server.c 2014-09-17 16:24:44.378754502 +0200 +@@ -1708,7 +1708,7 @@ bad: + { + EC_KEY *ecdh=NULL; + +- if (named_curve) ++ if (named_curve && strcmp(named_curve, "auto")) + { + int nid = OBJ_sn2nid(named_curve); + +@@ -1731,6 +1731,8 @@ bad: + { + BIO_printf(bio_s_out,"Setting temp ECDH parameters\n"); + } ++ else if (named_curve) ++ SSL_CTX_set_ecdh_auto(ctx, 1); + else + { + BIO_printf(bio_s_out,"Using default temp ECDH parameters\n"); +diff -up openssl-1.0.1e/ssl/ssl_cert.c.ecdh-auto openssl-1.0.1e/ssl/ssl_cert.c +--- openssl-1.0.1e/ssl/ssl_cert.c.ecdh-auto 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/ssl/ssl_cert.c 2014-09-17 16:20:24.355884360 +0200 +@@ -270,6 +270,7 @@ CERT *ssl_cert_dup(CERT *cert) + } + } + ret->ecdh_tmp_cb = cert->ecdh_tmp_cb; ++ ret->ecdh_tmp_auto = cert->ecdh_tmp_auto; + #endif + + for (i = 0; i < SSL_PKEY_NUM; i++) +diff -up openssl-1.0.1e/ssl/ssl.h.ecdh-auto openssl-1.0.1e/ssl/ssl.h +--- openssl-1.0.1e/ssl/ssl.h.ecdh-auto 2014-09-17 16:20:24.354884336 +0200 ++++ openssl-1.0.1e/ssl/ssl.h 2014-09-17 16:49:29.135273514 +0200 +@@ -1563,6 +1563,7 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) + #define SSL_CTRL_GET_EXTRA_CHAIN_CERTS 82 + #define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS 83 + ++#define SSL_CTRL_SET_ECDH_AUTO 94 + #define SSL_CTRL_GET_SERVER_TMP_KEY 109 + + #define DTLSv1_get_timeout(ssl, arg) \ +@@ -1606,6 +1607,11 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) + #define SSL_CTX_clear_extra_chain_certs(ctx) \ + SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL) + ++#define SSL_CTX_set_ecdh_auto(ctx, onoff) \ ++ SSL_CTX_ctrl(ctx,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL) ++#define SSL_set_ecdh_auto(s, onoff) \ ++ SSL_ctrl(s,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL) ++ + #define SSL_get_server_tmp_key(s, pk) \ + SSL_ctrl(s,SSL_CTRL_GET_SERVER_TMP_KEY,0,pk) + +diff -up openssl-1.0.1e/ssl/ssl_lib.c.ecdh-auto openssl-1.0.1e/ssl/ssl_lib.c +--- openssl-1.0.1e/ssl/ssl_lib.c.ecdh-auto 2014-09-17 15:52:01.616444274 +0200 ++++ openssl-1.0.1e/ssl/ssl_lib.c 2014-09-17 16:20:24.356884383 +0200 +@@ -2045,7 +2045,7 @@ void ssl_set_cert_masks(CERT *c, const S + #endif + + #ifndef OPENSSL_NO_ECDH +- have_ecdh_tmp=(c->ecdh_tmp != NULL || c->ecdh_tmp_cb != NULL); ++ have_ecdh_tmp=(c->ecdh_tmp || c->ecdh_tmp_cb || c->ecdh_tmp_auto); + #endif + cpk= &(c->pkeys[SSL_PKEY_RSA_ENC]); + rsa_enc= (cpk->x509 != NULL && cpk->privatekey != NULL); +diff -up openssl-1.0.1e/ssl/ssl_locl.h.ecdh-auto openssl-1.0.1e/ssl/ssl_locl.h +--- openssl-1.0.1e/ssl/ssl_locl.h.ecdh-auto 2014-09-17 15:52:01.632444635 +0200 ++++ openssl-1.0.1e/ssl/ssl_locl.h 2014-09-17 17:26:29.764405189 +0200 +@@ -511,6 +511,8 @@ typedef struct cert_st + EC_KEY *ecdh_tmp; + /* Callback for generating ephemeral ECDH keys */ + EC_KEY *(*ecdh_tmp_cb)(SSL *ssl,int is_export,int keysize); ++ /* Select ECDH parameters automatically */ ++ int ecdh_tmp_auto; + #endif + + CERT_PKEY pkeys[SSL_PKEY_NUM]; +@@ -1091,6 +1093,7 @@ SSL_COMP *ssl3_comp_find(STACK_OF(SSL_CO + #ifndef OPENSSL_NO_EC + int tls1_ec_curve_id2nid(int curve_id); + int tls1_ec_nid2curve_id(int nid); ++int tls1_shared_curve(SSL *s, int nmatch); + #endif /* OPENSSL_NO_EC */ + + #ifndef OPENSSL_NO_TLSEXT +diff -up openssl-1.0.1e/ssl/s3_lib.c.ecdh-auto openssl-1.0.1e/ssl/s3_lib.c +--- openssl-1.0.1e/ssl/s3_lib.c.ecdh-auto 2014-09-17 16:20:24.352884288 +0200 ++++ openssl-1.0.1e/ssl/s3_lib.c 2014-09-17 17:37:26.274226185 +0200 +@@ -3350,6 +3350,12 @@ long ssl3_ctrl(SSL *s, int cmd, long lar + #endif + + #endif /* !OPENSSL_NO_TLSEXT */ ++ ++#ifndef OPENSSL_NO_EC ++ case SSL_CTRL_SET_ECDH_AUTO: ++ s->cert->ecdh_tmp_auto = larg; ++ return 1; ++#endif + case SSL_CTRL_GET_SERVER_TMP_KEY: + if (s->server || !s->session || !s->session->sess_cert) + return 0; +@@ -3651,6 +3657,12 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd + ctx->srp_ctx.strength=larg; + break; + #endif ++ ++#ifndef OPENSSL_NO_EC ++ case SSL_CTRL_SET_ECDH_AUTO: ++ ctx->cert->ecdh_tmp_auto = larg; ++ return 1; ++#endif + #endif /* !OPENSSL_NO_TLSEXT */ + + /* A Thawte special :-) */ +@@ -4003,6 +4015,14 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, S + if ( + /* if we are considering an ECC cipher suite that uses an ephemeral EC key */ + (alg_k & SSL_kEECDH) ++ && (s->cert->ecdh_tmp_auto) ++ ) ++ { ++ ok = ok && tls1_shared_curve(s, 0); ++ } ++ else if ( ++ /* if we are considering an ECC cipher suite that uses an ephemeral EC key */ ++ (alg_k & SSL_kEECDH) + /* and we have an ephemeral EC key */ + && (s->cert->ecdh_tmp != NULL) + /* and the client specified an EllipticCurves extension */ +diff -up openssl-1.0.1e/ssl/s3_srvr.c.ecdh-auto openssl-1.0.1e/ssl/s3_srvr.c +--- openssl-1.0.1e/ssl/s3_srvr.c.ecdh-auto 2014-09-17 15:52:01.644444906 +0200 ++++ openssl-1.0.1e/ssl/s3_srvr.c 2014-09-17 16:20:24.353884312 +0200 +@@ -1693,7 +1693,14 @@ int ssl3_send_server_key_exchange(SSL *s + const EC_GROUP *group; + + ecdhp=cert->ecdh_tmp; +- if ((ecdhp == NULL) && (s->cert->ecdh_tmp_cb != NULL)) ++ if (s->cert->ecdh_tmp_auto) ++ { ++ /* Get NID of first shared curve */ ++ int nid = tls1_shared_curve(s, 0); ++ if (nid != NID_undef) ++ ecdhp = EC_KEY_new_by_curve_name(nid); ++ } ++ else if ((ecdhp == NULL) && s->cert->ecdh_tmp_cb) + { + ecdhp=s->cert->ecdh_tmp_cb(s, + SSL_C_IS_EXPORT(s->s3->tmp.new_cipher), +@@ -1718,7 +1725,9 @@ int ssl3_send_server_key_exchange(SSL *s + SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_R_ECDH_LIB); + goto err; + } +- if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) ++ if (s->cert->ecdh_tmp_auto) ++ ecdh = ecdhp; ++ else if ((ecdh = EC_KEY_dup(ecdhp)) == NULL) + { + SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_R_ECDH_LIB); + goto err; +diff -up openssl-1.0.1e/ssl/t1_lib.c.ecdh-auto openssl-1.0.1e/ssl/t1_lib.c +--- openssl-1.0.1e/ssl/t1_lib.c.ecdh-auto 2014-09-17 16:20:24.358884427 +0200 ++++ openssl-1.0.1e/ssl/t1_lib.c 2014-09-17 17:32:04.054951942 +0200 +@@ -202,6 +202,13 @@ static int nid_list[] = + NID_secp521r1 /* secp521r1 (25) */ + }; + ++static const unsigned char eccurves_default[] = ++ { ++ 0,23, /* secp256r1 (23) */ ++ 0,24, /* secp384r1 (24) */ ++ 0,25, /* secp521r1 (25) */ ++ }; ++ + static int pref_list[] = + { + NID_secp521r1, /* secp521r1 (25) */ +@@ -277,6 +284,69 @@ int tls1_ec_nid2curve_id(int nid) + return 0; + } + } ++/* Get curves list, if "sess" is set return client curves otherwise ++ * preferred list ++ */ ++static void tls1_get_curvelist(SSL *s, int sess, ++ const unsigned char **pcurves, ++ size_t *pcurveslen) ++ { ++ if (sess) ++ { ++ *pcurves = s->session->tlsext_ellipticcurvelist; ++ *pcurveslen = s->session->tlsext_ellipticcurvelist_length; ++ } ++ else ++ { ++ *pcurves = s->tlsext_ellipticcurvelist; ++ *pcurveslen = s->tlsext_ellipticcurvelist_length; ++ } ++ if (!*pcurves) ++ { ++ *pcurves = eccurves_default; ++ *pcurveslen = sizeof(eccurves_default); ++ } ++ } ++/* Return nth shared curve. If nmatch == -1 return number of ++ * matches. ++ */ ++ ++int tls1_shared_curve(SSL *s, int nmatch) ++ { ++ const unsigned char *pref, *supp; ++ size_t preflen, supplen, i, j; ++ int k; ++ /* Can't do anything on client side */ ++ if (s->server == 0) ++ return -1; ++ tls1_get_curvelist(s, !!(s->options & SSL_OP_CIPHER_SERVER_PREFERENCE), ++ &supp, &supplen); ++ tls1_get_curvelist(s, !(s->options & SSL_OP_CIPHER_SERVER_PREFERENCE), ++ &pref, &preflen); ++ preflen /= 2; ++ supplen /= 2; ++ k = 0; ++ for (i = 0; i < preflen; i++, pref+=2) ++ { ++ const unsigned char *tsupp = supp; ++ for (j = 0; j < supplen; j++, tsupp+=2) ++ { ++ if (pref[0] == tsupp[0] && pref[1] == tsupp[1]) ++ { ++ if (nmatch == k) ++ { ++ int id = (pref[0] << 8) | pref[1]; ++ return tls1_ec_curve_id2nid(id); ++ } ++ k++; ++ } ++ } ++ } ++ if (nmatch == -1) ++ return k; ++ return 0; ++ } ++ + #endif /* OPENSSL_NO_EC */ + + #ifndef OPENSSL_NO_TLSEXT diff --git a/SOURCES/openssl-1.0.1e-enc-fail.patch b/SOURCES/openssl-1.0.1e-enc-fail.patch new file mode 100644 index 0000000..a5a43f0 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-enc-fail.patch @@ -0,0 +1,39 @@ +diff -up openssl-1.0.1e/crypto/evp/bio_enc.c.enc-fail openssl-1.0.1e/crypto/evp/bio_enc.c +--- openssl-1.0.1e/crypto/evp/bio_enc.c.enc-fail 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/evp/bio_enc.c 2014-03-04 15:21:12.185821738 +0100 +@@ -198,10 +198,15 @@ static int enc_read(BIO *b, char *out, i + } + else + { +- EVP_CipherUpdate(&(ctx->cipher), ++ if (!EVP_CipherUpdate(&(ctx->cipher), + (unsigned char *)ctx->buf,&ctx->buf_len, +- (unsigned char *)&(ctx->buf[BUF_OFFSET]),i); +- ctx->cont=1; ++ (unsigned char *)&(ctx->buf[BUF_OFFSET]),i)) ++ { ++ ctx->ok = 0; ++ ctx->cont = 0; ++ } ++ else ++ ctx->cont=1; + /* Note: it is possible for EVP_CipherUpdate to + * decrypt zero bytes because this is or looks like + * the final block: if this happens we should retry +@@ -257,9 +262,14 @@ static int enc_write(BIO *b, const char + while (inl > 0) + { + n=(inl > ENC_BLOCK_SIZE)?ENC_BLOCK_SIZE:inl; +- EVP_CipherUpdate(&(ctx->cipher), ++ if (!EVP_CipherUpdate(&(ctx->cipher), + (unsigned char *)ctx->buf,&ctx->buf_len, +- (unsigned char *)in,n); ++ (unsigned char *)in,n)) ++ { ++ BIO_copy_next_retry(b); ++ ctx->ok = 0; ++ return ret - inl; ++ } + inl-=n; + in+=n; + diff --git a/SOURCES/openssl-1.0.1e-evp-wrap.patch b/SOURCES/openssl-1.0.1e-evp-wrap.patch new file mode 100644 index 0000000..bf46f6c --- /dev/null +++ b/SOURCES/openssl-1.0.1e-evp-wrap.patch @@ -0,0 +1,1239 @@ +diff -up openssl-1.0.1e/crypto/aes/aes_wrap.c.wrap openssl-1.0.1e/crypto/aes/aes_wrap.c +--- openssl-1.0.1e/crypto/aes/aes_wrap.c.wrap 2013-02-11 16:02:47.000000000 +0100 ++++ openssl-1.0.1e/crypto/aes/aes_wrap.c 2014-09-09 16:12:25.852801573 +0200 +@@ -53,207 +53,18 @@ + + #include "cryptlib.h" + #include +-#include +- +-static const unsigned char default_iv[] = { +- 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, +-}; ++#include + + int AES_wrap_key(AES_KEY *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, unsigned int inlen) + { +- unsigned char *A, B[16], *R; +- unsigned int i, j, t; +- if ((inlen & 0x7) || (inlen < 8)) +- return -1; +- A = B; +- t = 1; +- memcpy(out + 8, in, inlen); +- if (!iv) +- iv = default_iv; +- +- memcpy(A, iv, 8); +- +- for (j = 0; j < 6; j++) +- { +- R = out + 8; +- for (i = 0; i < inlen; i += 8, t++, R += 8) +- { +- memcpy(B + 8, R, 8); +- AES_encrypt(B, B, key); +- A[7] ^= (unsigned char)(t & 0xff); +- if (t > 0xff) +- { +- A[6] ^= (unsigned char)((t >> 8) & 0xff); +- A[5] ^= (unsigned char)((t >> 16) & 0xff); +- A[4] ^= (unsigned char)((t >> 24) & 0xff); +- } +- memcpy(R, B + 8, 8); +- } +- } +- memcpy(out, A, 8); +- return inlen + 8; ++ return CRYPTO_128_wrap(key, iv, out, in, inlen, (block128_f)AES_encrypt); + } + + int AES_unwrap_key(AES_KEY *key, const unsigned char *iv, + unsigned char *out, + const unsigned char *in, unsigned int inlen) + { +- unsigned char *A, B[16], *R; +- unsigned int i, j, t; +- inlen -= 8; +- if (inlen & 0x7) +- return -1; +- if (inlen < 8) +- return -1; +- A = B; +- t = 6 * (inlen >> 3); +- memcpy(A, in, 8); +- memcpy(out, in + 8, inlen); +- for (j = 0; j < 6; j++) +- { +- R = out + inlen - 8; +- for (i = 0; i < inlen; i += 8, t--, R -= 8) +- { +- A[7] ^= (unsigned char)(t & 0xff); +- if (t > 0xff) +- { +- A[6] ^= (unsigned char)((t >> 8) & 0xff); +- A[5] ^= (unsigned char)((t >> 16) & 0xff); +- A[4] ^= (unsigned char)((t >> 24) & 0xff); +- } +- memcpy(B + 8, R, 8); +- AES_decrypt(B, B, key); +- memcpy(R, B + 8, 8); +- } +- } +- if (!iv) +- iv = default_iv; +- if (memcmp(A, iv, 8)) +- { +- OPENSSL_cleanse(out, inlen); +- return 0; +- } +- return inlen; +- } +- +-#ifdef AES_WRAP_TEST +- +-int AES_wrap_unwrap_test(const unsigned char *kek, int keybits, +- const unsigned char *iv, +- const unsigned char *eout, +- const unsigned char *key, int keylen) +- { +- unsigned char *otmp = NULL, *ptmp = NULL; +- int r, ret = 0; +- AES_KEY wctx; +- otmp = OPENSSL_malloc(keylen + 8); +- ptmp = OPENSSL_malloc(keylen); +- if (!otmp || !ptmp) +- return 0; +- if (AES_set_encrypt_key(kek, keybits, &wctx)) +- goto err; +- r = AES_wrap_key(&wctx, iv, otmp, key, keylen); +- if (r <= 0) +- goto err; +- +- if (eout && memcmp(eout, otmp, keylen)) +- goto err; +- +- if (AES_set_decrypt_key(kek, keybits, &wctx)) +- goto err; +- r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r); +- +- if (memcmp(key, ptmp, keylen)) +- goto err; +- +- ret = 1; +- +- err: +- if (otmp) +- OPENSSL_free(otmp); +- if (ptmp) +- OPENSSL_free(ptmp); +- +- return ret; +- ++ return CRYPTO_128_unwrap(key, iv, out, in, inlen, (block128_f)AES_decrypt); + } +- +- +- +-int main(int argc, char **argv) +-{ +- +-static const unsigned char kek[] = { +- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, +- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, +- 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f +-}; +- +-static const unsigned char key[] = { +- 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, +- 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, +- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, +- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +-}; +- +-static const unsigned char e1[] = { +- 0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47, +- 0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82, +- 0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5 +-}; +- +-static const unsigned char e2[] = { +- 0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35, +- 0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2, +- 0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d +-}; +- +-static const unsigned char e3[] = { +- 0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2, +- 0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a, +- 0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7 +-}; +- +-static const unsigned char e4[] = { +- 0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32, +- 0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc, +- 0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93, +- 0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2 +-}; +- +-static const unsigned char e5[] = { +- 0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f, +- 0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4, +- 0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95, +- 0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1 +-}; +- +-static const unsigned char e6[] = { +- 0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4, +- 0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26, +- 0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26, +- 0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b, +- 0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21 +-}; +- +- AES_KEY wctx, xctx; +- int ret; +- ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16); +- fprintf(stderr, "Key test result %d\n", ret); +- ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16); +- fprintf(stderr, "Key test result %d\n", ret); +- ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16); +- fprintf(stderr, "Key test result %d\n", ret); +- ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24); +- fprintf(stderr, "Key test result %d\n", ret); +- ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24); +- fprintf(stderr, "Key test result %d\n", ret); +- ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32); +- fprintf(stderr, "Key test result %d\n", ret); +-} +- +- +-#endif +diff -up openssl-1.0.1e/crypto/evp/c_allc.c.wrap openssl-1.0.1e/crypto/evp/c_allc.c +--- openssl-1.0.1e/crypto/evp/c_allc.c.wrap 2014-09-09 16:11:24.103379348 +0200 ++++ openssl-1.0.1e/crypto/evp/c_allc.c 2014-09-09 16:12:25.853801601 +0200 +@@ -98,6 +98,7 @@ void OpenSSL_add_all_ciphers(void) + EVP_add_cipher(EVP_des_ecb()); + EVP_add_cipher(EVP_des_ede()); + EVP_add_cipher(EVP_des_ede3()); ++ EVP_add_cipher(EVP_des_ede3_wrap()); + #endif + + #ifndef OPENSSL_NO_RC4 +@@ -177,6 +178,8 @@ void OpenSSL_add_all_ciphers(void) + EVP_add_cipher(EVP_aes_128_ctr()); + EVP_add_cipher(EVP_aes_128_gcm()); + EVP_add_cipher(EVP_aes_128_xts()); ++ EVP_add_cipher(EVP_aes_128_wrap()); ++ EVP_add_cipher(EVP_aes_128_wrap_pad()); + EVP_add_cipher_alias(SN_aes_128_cbc,"AES128"); + EVP_add_cipher_alias(SN_aes_128_cbc,"aes128"); + EVP_add_cipher(EVP_aes_192_ecb()); +@@ -187,6 +190,8 @@ void OpenSSL_add_all_ciphers(void) + EVP_add_cipher(EVP_aes_192_ofb()); + EVP_add_cipher(EVP_aes_192_ctr()); + EVP_add_cipher(EVP_aes_192_gcm()); ++ EVP_add_cipher(EVP_aes_192_wrap()); ++ EVP_add_cipher(EVP_aes_192_wrap_pad()); + EVP_add_cipher_alias(SN_aes_192_cbc,"AES192"); + EVP_add_cipher_alias(SN_aes_192_cbc,"aes192"); + EVP_add_cipher(EVP_aes_256_ecb()); +@@ -198,6 +203,8 @@ void OpenSSL_add_all_ciphers(void) + EVP_add_cipher(EVP_aes_256_ctr()); + EVP_add_cipher(EVP_aes_256_gcm()); + EVP_add_cipher(EVP_aes_256_xts()); ++ EVP_add_cipher(EVP_aes_256_wrap()); ++ EVP_add_cipher(EVP_aes_256_wrap_pad()); + EVP_add_cipher_alias(SN_aes_256_cbc,"AES256"); + EVP_add_cipher_alias(SN_aes_256_cbc,"aes256"); + #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) +@@ -250,6 +257,7 @@ void OpenSSL_add_all_ciphers(void) + + EVP_add_cipher(EVP_des_ede()); + EVP_add_cipher(EVP_des_ede3()); ++ EVP_add_cipher(EVP_des_ede3_wrap()); + #endif + + #ifndef OPENSSL_NO_AES +@@ -262,6 +270,8 @@ void OpenSSL_add_all_ciphers(void) + EVP_add_cipher(EVP_aes_128_ctr()); + EVP_add_cipher(EVP_aes_128_gcm()); + EVP_add_cipher(EVP_aes_128_xts()); ++ EVP_add_cipher(EVP_aes_128_wrap()); ++ EVP_add_cipher(EVP_aes_128_wrap_pad()); + EVP_add_cipher_alias(SN_aes_128_cbc,"AES128"); + EVP_add_cipher_alias(SN_aes_128_cbc,"aes128"); + EVP_add_cipher(EVP_aes_192_ecb()); +@@ -272,6 +282,8 @@ void OpenSSL_add_all_ciphers(void) + EVP_add_cipher(EVP_aes_192_ofb()); + EVP_add_cipher(EVP_aes_192_ctr()); + EVP_add_cipher(EVP_aes_192_gcm()); ++ EVP_add_cipher(EVP_aes_192_wrap()); ++ EVP_add_cipher(EVP_aes_192_wrap_pad()); + EVP_add_cipher_alias(SN_aes_192_cbc,"AES192"); + EVP_add_cipher_alias(SN_aes_192_cbc,"aes192"); + EVP_add_cipher(EVP_aes_256_ecb()); +@@ -283,6 +295,8 @@ void OpenSSL_add_all_ciphers(void) + EVP_add_cipher(EVP_aes_256_ctr()); + EVP_add_cipher(EVP_aes_256_gcm()); + EVP_add_cipher(EVP_aes_256_xts()); ++ EVP_add_cipher(EVP_aes_256_wrap()); ++ EVP_add_cipher(EVP_aes_256_wrap_pad()); + EVP_add_cipher_alias(SN_aes_256_cbc,"AES256"); + EVP_add_cipher_alias(SN_aes_256_cbc,"aes256"); + #endif +diff -up openssl-1.0.1e/crypto/evp/e_aes.c.wrap openssl-1.0.1e/crypto/evp/e_aes.c +--- openssl-1.0.1e/crypto/evp/e_aes.c.wrap 2014-09-09 16:11:24.103379348 +0200 ++++ openssl-1.0.1e/crypto/evp/e_aes.c 2014-09-09 16:12:25.853801601 +0200 +@@ -1,5 +1,5 @@ + /* ==================================================================== +- * Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved. ++ * Copyright (c) 2001-2014 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions +@@ -1323,4 +1323,180 @@ BLOCK_CIPHER_custom(NID_aes,128,1,12,ccm + BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) + BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS) + ++typedef struct ++ { ++ union { double align; AES_KEY ks; } ks; ++ /* Indicates if IV has been set */ ++ unsigned char *iv; ++ } EVP_AES_WRAP_CTX; ++ ++static int aes_wrap_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, ++ const unsigned char *iv, int enc) ++ { ++ EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; ++ if (!iv && !key) ++ return 1; ++ if (key) ++ { ++ if (ctx->encrypt) ++ AES_set_encrypt_key(key, ctx->key_len * 8, &wctx->ks.ks); ++ else ++ AES_set_decrypt_key(key, ctx->key_len * 8, &wctx->ks.ks); ++ if (!iv) ++ wctx->iv = NULL; ++ } ++ if (iv) ++ { ++ memcpy(ctx->iv, iv, EVP_CIPHER_CTX_iv_length(ctx)); ++ wctx->iv = ctx->iv; ++ } ++ return 1; ++ } ++ ++static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inlen) ++ { ++ EVP_AES_WRAP_CTX *wctx = ctx->cipher_data; ++ size_t rv; ++ /* AES wrap with padding has IV length of 4, without padding 8 */ ++ int pad = EVP_CIPHER_CTX_iv_length(ctx) == 4; ++ /* No final operation so always return zero length */ ++ if (!in) ++ return 0; ++ /* Input length must always be non-zero */ ++ if (!inlen) ++ return -1; ++ /* If decrypting need at least 16 bytes and multiple of 8 */ ++ if (!ctx->encrypt && (inlen < 16 || inlen & 0x7)) ++ return -1; ++ /* If not padding input must be multiple of 8 */ ++ if (!pad && inlen & 0x7) ++ return -1; ++ if (!out) ++ { ++ if (ctx->encrypt) ++ { ++ /* If padding round up to multiple of 8 */ ++ if (pad) ++ inlen = (inlen + 7)/8 * 8; ++ /* 8 byte prefix */ ++ return inlen + 8; ++ } ++ else ++ { ++ /* If not padding output will be exactly 8 bytes ++ * smaller than input. If padding it will be at ++ * least 8 bytes smaller but we don't know how ++ * much. ++ */ ++ return inlen - 8; ++ } ++ } ++ if (pad) ++ { ++ if (ctx->encrypt) ++ rv = CRYPTO_128_wrap_pad(&wctx->ks.ks, wctx->iv, ++ out, in, inlen, ++ (block128_f)AES_encrypt); ++ else ++ rv = CRYPTO_128_unwrap_pad(&wctx->ks.ks, wctx->iv, ++ out, in, inlen, ++ (block128_f)AES_decrypt); ++ } ++ else ++ { ++ if (ctx->encrypt) ++ rv = CRYPTO_128_wrap(&wctx->ks.ks, wctx->iv, ++ out, in, inlen, ++ (block128_f)AES_encrypt); ++ else ++ rv = CRYPTO_128_unwrap(&wctx->ks.ks, wctx->iv, ++ out, in, inlen, ++ (block128_f)AES_decrypt); ++ } ++ return rv ? (int)rv : -1; ++ } ++ ++#define WRAP_FLAGS (EVP_CIPH_WRAP_MODE | EVP_CIPH_FLAG_FIPS \ ++ | EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \ ++ | EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_FLAG_DEFAULT_ASN1) ++ ++static const EVP_CIPHER aes_128_wrap = { ++ NID_id_aes128_wrap, ++ 8, 16, 8, WRAP_FLAGS, ++ aes_wrap_init_key, aes_wrap_cipher, ++ NULL, ++ sizeof(EVP_AES_WRAP_CTX), ++ NULL,NULL,NULL,NULL }; ++ ++const EVP_CIPHER *EVP_aes_128_wrap(void) ++ { ++ return &aes_128_wrap; ++ } ++ ++static const EVP_CIPHER aes_192_wrap = { ++ NID_id_aes192_wrap, ++ 8, 24, 8, WRAP_FLAGS, ++ aes_wrap_init_key, aes_wrap_cipher, ++ NULL, ++ sizeof(EVP_AES_WRAP_CTX), ++ NULL,NULL,NULL,NULL }; ++ ++const EVP_CIPHER *EVP_aes_192_wrap(void) ++ { ++ return &aes_192_wrap; ++ } ++ ++static const EVP_CIPHER aes_256_wrap = { ++ NID_id_aes256_wrap, ++ 8, 32, 8, WRAP_FLAGS, ++ aes_wrap_init_key, aes_wrap_cipher, ++ NULL, ++ sizeof(EVP_AES_WRAP_CTX), ++ NULL,NULL,NULL,NULL }; ++ ++const EVP_CIPHER *EVP_aes_256_wrap(void) ++ { ++ return &aes_256_wrap; ++ } ++ ++static const EVP_CIPHER aes_128_wrap_pad = { ++ NID_id_aes128_wrap_pad, ++ 8, 16, 4, WRAP_FLAGS, ++ aes_wrap_init_key, aes_wrap_cipher, ++ NULL, ++ sizeof(EVP_AES_WRAP_CTX), ++ NULL,NULL,NULL,NULL }; ++ ++const EVP_CIPHER *EVP_aes_128_wrap_pad(void) ++ { ++ return &aes_128_wrap_pad; ++ } ++ ++static const EVP_CIPHER aes_192_wrap_pad = { ++ NID_id_aes192_wrap_pad, ++ 8, 24, 4, WRAP_FLAGS, ++ aes_wrap_init_key, aes_wrap_cipher, ++ NULL, ++ sizeof(EVP_AES_WRAP_CTX), ++ NULL,NULL,NULL,NULL }; ++ ++const EVP_CIPHER *EVP_aes_192_wrap_pad(void) ++ { ++ return &aes_192_wrap_pad; ++ } ++ ++static const EVP_CIPHER aes_256_wrap_pad = { ++ NID_id_aes256_wrap_pad, ++ 8, 32, 4, WRAP_FLAGS, ++ aes_wrap_init_key, aes_wrap_cipher, ++ NULL, ++ sizeof(EVP_AES_WRAP_CTX), ++ NULL,NULL,NULL,NULL }; ++ ++const EVP_CIPHER *EVP_aes_256_wrap_pad(void) ++ { ++ return &aes_256_wrap_pad; ++ } ++ + #endif +diff -up openssl-1.0.1e/crypto/evp/e_des3.c.wrap openssl-1.0.1e/crypto/evp/e_des3.c +--- openssl-1.0.1e/crypto/evp/e_des3.c.wrap 2014-09-09 16:11:24.104379372 +0200 ++++ openssl-1.0.1e/crypto/evp/e_des3.c 2014-09-09 16:12:25.854801627 +0200 +@@ -310,4 +310,112 @@ const EVP_CIPHER *EVP_des_ede3(void) + { + return &des_ede3_ecb; + } ++ ++#ifndef OPENSSL_NO_SHA ++ ++#include ++ ++static const unsigned char wrap_iv[8] = {0x4a,0xdd,0xa2,0x2c,0x79,0xe8,0x21,0x05}; ++ ++static int des_ede3_unwrap(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inl) ++ { ++ unsigned char icv[8], iv[8], sha1tmp[SHA_DIGEST_LENGTH]; ++ int rv = -1; ++ if (inl < 24) ++ return -1; ++ if (!out) ++ return inl - 16; ++ memcpy(ctx->iv, wrap_iv, 8); ++ /* Decrypt first block which will end up as icv */ ++ des_ede_cbc_cipher(ctx, icv, in, 8); ++ /* Decrypt central blocks */ ++ /* If decrypting in place move whole output along a block ++ * so the next des_ede_cbc_cipher is in place. ++ */ ++ if (out == in) ++ { ++ memmove(out, out + 8, inl - 8); ++ in -= 8; ++ } ++ des_ede_cbc_cipher(ctx, out, in + 8, inl - 16); ++ /* Decrypt final block which will be IV */ ++ des_ede_cbc_cipher(ctx, iv, in + inl - 8, 8); ++ /* Reverse order of everything */ ++ BUF_reverse(icv, NULL, 8); ++ BUF_reverse(out, NULL, inl - 16); ++ BUF_reverse(ctx->iv, iv, 8); ++ /* Decrypt again using new IV */ ++ des_ede_cbc_cipher(ctx, out, out, inl - 16); ++ des_ede_cbc_cipher(ctx, icv, icv, 8); ++ /* Work out SHA1 hash of first portion */ ++ SHA1(out, inl - 16, sha1tmp); ++ ++ if (!CRYPTO_memcmp(sha1tmp, icv, 8)) ++ rv = inl - 16; ++ OPENSSL_cleanse(icv, 8); ++ OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH); ++ OPENSSL_cleanse(iv, 8); ++ OPENSSL_cleanse(ctx->iv, 8); ++ if (rv == -1) ++ OPENSSL_cleanse(out, inl - 16); ++ ++ return rv; ++ } ++ ++static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inl) ++ { ++ unsigned char sha1tmp[SHA_DIGEST_LENGTH]; ++ if (!out) ++ return inl + 16; ++ /* Copy input to output buffer + 8 so we have space for IV */ ++ memmove(out + 8, in, inl); ++ /* Work out ICV */ ++ SHA1(in, inl, sha1tmp); ++ memcpy(out + inl + 8, sha1tmp, 8); ++ OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH); ++ /* Generate random IV */ ++ RAND_bytes(ctx->iv, 8); ++ memcpy(out, ctx->iv, 8); ++ /* Encrypt everything after IV in place */ ++ des_ede_cbc_cipher(ctx, out + 8, out + 8, inl + 8); ++ BUF_reverse(out, NULL, inl + 16); ++ memcpy(ctx->iv, wrap_iv, 8); ++ des_ede_cbc_cipher(ctx, out, out, inl + 16); ++ return inl + 16; ++ } ++ ++static int des_ede3_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inl) ++ { ++ /* Sanity check input length: we typically only wrap keys ++ * so EVP_MAXCHUNK is more than will ever be needed. Also ++ * input length must be a multiple of 8 bits. ++ */ ++ if (inl >= EVP_MAXCHUNK || inl % 8) ++ return -1; ++ if (ctx->encrypt) ++ return des_ede3_wrap(ctx, out, in, inl); ++ else ++ return des_ede3_unwrap(ctx, out, in, inl); ++ } ++ ++static const EVP_CIPHER des3_wrap = { ++ NID_id_smime_alg_CMS3DESwrap, ++ 8, 24, 0, ++ EVP_CIPH_WRAP_MODE|EVP_CIPH_CUSTOM_IV|EVP_CIPH_FLAG_CUSTOM_CIPHER ++ |EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_FIPS, ++ des_ede3_init_key, des_ede3_wrap_cipher, ++ NULL, ++ sizeof(DES_EDE_KEY), ++ NULL,NULL,NULL,NULL }; ++ ++ ++const EVP_CIPHER *EVP_des_ede3_wrap(void) ++ { ++ return &des3_wrap; ++ } ++ ++# endif + #endif +diff -up openssl-1.0.1e/crypto/evp/evp_enc.c.wrap openssl-1.0.1e/crypto/evp/evp_enc.c +--- openssl-1.0.1e/crypto/evp/evp_enc.c.wrap 2014-09-09 16:11:24.104379372 +0200 ++++ openssl-1.0.1e/crypto/evp/evp_enc.c 2014-09-09 16:12:25.854801627 +0200 +@@ -233,7 +233,8 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ct + ctx->cipher_data = NULL; + } + ctx->key_len = cipher->key_len; +- ctx->flags = 0; ++ /* Preserve wrap enable flag, zero everything else */ ++ ctx->flags &= EVP_CIPHER_CTX_FLAG_WRAP_ALLOW; + if(ctx->cipher->flags & EVP_CIPH_CTRL_INIT) + { + if(!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL)) +@@ -256,6 +257,13 @@ skip_to_init: + || ctx->cipher->block_size == 8 + || ctx->cipher->block_size == 16); + ++ if(!(ctx->flags & EVP_CIPHER_CTX_FLAG_WRAP_ALLOW) ++ && EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_WRAP_MODE) ++ { ++ EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_WRAP_MODE_NOT_ALLOWED); ++ return 0; ++ } ++ + if(!(EVP_CIPHER_CTX_flags(ctx) & EVP_CIPH_CUSTOM_IV)) { + switch(EVP_CIPHER_CTX_mode(ctx)) { + +diff -up openssl-1.0.1e/crypto/evp/evp_err.c.wrap openssl-1.0.1e/crypto/evp/evp_err.c +--- openssl-1.0.1e/crypto/evp/evp_err.c.wrap 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/evp/evp_err.c 2014-09-09 16:12:25.854801627 +0200 +@@ -1,6 +1,6 @@ + /* crypto/evp/evp_err.c */ + /* ==================================================================== +- * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. ++ * Copyright (c) 1999-2013 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions +@@ -220,6 +220,7 @@ static ERR_STRING_DATA EVP_str_reasons[] + {ERR_REASON(EVP_R_UNSUPPORTED_PRF) ,"unsupported prf"}, + {ERR_REASON(EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM),"unsupported private key algorithm"}, + {ERR_REASON(EVP_R_UNSUPPORTED_SALT_TYPE) ,"unsupported salt type"}, ++{ERR_REASON(EVP_R_WRAP_MODE_NOT_ALLOWED) ,"wrap mode not allowed"}, + {ERR_REASON(EVP_R_WRONG_FINAL_BLOCK_LENGTH),"wrong final block length"}, + {ERR_REASON(EVP_R_WRONG_PUBLIC_KEY_TYPE) ,"wrong public key type"}, + {0,NULL} +diff -up openssl-1.0.1e/crypto/evp/evp.h.wrap openssl-1.0.1e/crypto/evp/evp.h +--- openssl-1.0.1e/crypto/evp/evp.h.wrap 2014-09-09 16:11:24.104379372 +0200 ++++ openssl-1.0.1e/crypto/evp/evp.h 2014-09-09 16:12:25.855801651 +0200 +@@ -336,6 +336,7 @@ struct evp_cipher_st + #define EVP_CIPH_GCM_MODE 0x6 + #define EVP_CIPH_CCM_MODE 0x7 + #define EVP_CIPH_XTS_MODE 0x10001 ++#define EVP_CIPH_WRAP_MODE 0x10002 + #define EVP_CIPH_MODE 0xF0007 + /* Set if variable length cipher */ + #define EVP_CIPH_VARIABLE_LENGTH 0x8 +@@ -367,6 +368,13 @@ struct evp_cipher_st + #define EVP_CIPH_FLAG_CUSTOM_CIPHER 0x100000 + #define EVP_CIPH_FLAG_AEAD_CIPHER 0x200000 + ++/* Cipher context flag to indicate we can handle ++ * wrap mode: if allowed in older applications it could ++ * overflow buffers. ++ */ ++ ++#define EVP_CIPHER_CTX_FLAG_WRAP_ALLOW 0x1 ++ + /* ctrl() values */ + + #define EVP_CTRL_INIT 0x0 +@@ -729,6 +737,7 @@ const EVP_CIPHER *EVP_des_cbc(void); + const EVP_CIPHER *EVP_des_ede_cbc(void); + const EVP_CIPHER *EVP_des_ede3_cbc(void); + const EVP_CIPHER *EVP_desx_cbc(void); ++const EVP_CIPHER *EVP_des_ede3_wrap(void); + /* This should now be supported through the dev_crypto ENGINE. But also, why are + * rc4 and md5 declarations made here inside a "NO_DES" precompiler branch? */ + #if 0 +@@ -788,6 +797,8 @@ const EVP_CIPHER *EVP_aes_128_ctr(void); + const EVP_CIPHER *EVP_aes_128_ccm(void); + const EVP_CIPHER *EVP_aes_128_gcm(void); + const EVP_CIPHER *EVP_aes_128_xts(void); ++const EVP_CIPHER *EVP_aes_128_wrap(void); ++const EVP_CIPHER *EVP_aes_128_wrap_pad(void); + const EVP_CIPHER *EVP_aes_192_ecb(void); + const EVP_CIPHER *EVP_aes_192_cbc(void); + const EVP_CIPHER *EVP_aes_192_cfb1(void); +@@ -798,6 +809,8 @@ const EVP_CIPHER *EVP_aes_192_ofb(void); + const EVP_CIPHER *EVP_aes_192_ctr(void); + const EVP_CIPHER *EVP_aes_192_ccm(void); + const EVP_CIPHER *EVP_aes_192_gcm(void); ++const EVP_CIPHER *EVP_aes_192_wrap(void); ++const EVP_CIPHER *EVP_aes_192_wrap_pad(void); + const EVP_CIPHER *EVP_aes_256_ecb(void); + const EVP_CIPHER *EVP_aes_256_cbc(void); + const EVP_CIPHER *EVP_aes_256_cfb1(void); +@@ -809,6 +822,8 @@ const EVP_CIPHER *EVP_aes_256_ctr(void); + const EVP_CIPHER *EVP_aes_256_ccm(void); + const EVP_CIPHER *EVP_aes_256_gcm(void); + const EVP_CIPHER *EVP_aes_256_xts(void); ++const EVP_CIPHER *EVP_aes_256_wrap(void); ++const EVP_CIPHER *EVP_aes_256_wrap_pad(void); + #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1) + const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void); + const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void); +@@ -1397,6 +1412,7 @@ void ERR_load_EVP_strings(void); + #define EVP_R_UNSUPPORTED_PRF 125 + #define EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM 118 + #define EVP_R_UNSUPPORTED_SALT_TYPE 126 ++#define EVP_R_WRAP_MODE_NOT_ALLOWED 170 + #define EVP_R_WRONG_FINAL_BLOCK_LENGTH 109 + #define EVP_R_WRONG_PUBLIC_KEY_TYPE 110 + +diff -up openssl-1.0.1e/crypto/evp/evp_lib.c.wrap openssl-1.0.1e/crypto/evp/evp_lib.c +--- openssl-1.0.1e/crypto/evp/evp_lib.c.wrap 2014-09-09 16:11:24.104379372 +0200 ++++ openssl-1.0.1e/crypto/evp/evp_lib.c 2014-09-09 16:12:25.855801651 +0200 +@@ -68,7 +68,15 @@ int EVP_CIPHER_param_to_asn1(EVP_CIPHER_ + if (c->cipher->set_asn1_parameters != NULL) + ret=c->cipher->set_asn1_parameters(c,type); + else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) +- ret=EVP_CIPHER_set_asn1_iv(c, type); ++ { ++ if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE) ++ { ++ ASN1_TYPE_set(type, V_ASN1_NULL, NULL); ++ ret = 1; ++ } ++ else ++ ret=EVP_CIPHER_set_asn1_iv(c, type); ++ } + else + ret=-1; + return(ret); +@@ -81,7 +89,11 @@ int EVP_CIPHER_asn1_to_param(EVP_CIPHER_ + if (c->cipher->get_asn1_parameters != NULL) + ret=c->cipher->get_asn1_parameters(c,type); + else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1) ++ { ++ if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE) ++ return 1; + ret=EVP_CIPHER_get_asn1_iv(c, type); ++ } + else + ret=-1; + return(ret); +diff -up openssl-1.0.1e/crypto/evp/evp_test.c.wrap openssl-1.0.1e/crypto/evp/evp_test.c +--- openssl-1.0.1e/crypto/evp/evp_test.c.wrap 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/evp/evp_test.c 2014-09-09 16:12:25.856801673 +0200 +@@ -141,7 +141,7 @@ static void test1(const EVP_CIPHER *c,co + { + EVP_CIPHER_CTX ctx; + unsigned char out[4096]; +- int outl,outl2; ++ int outl,outl2,mode; + + printf("Testing cipher %s%s\n",EVP_CIPHER_name(c), + (encdec == 1 ? "(encrypt)" : (encdec == 0 ? "(decrypt)" : "(encrypt/decrypt)"))); +@@ -151,6 +151,7 @@ static void test1(const EVP_CIPHER *c,co + hexdump(stdout,"Plaintext",plaintext,pn); + hexdump(stdout,"Ciphertext",ciphertext,cn); + ++ mode = EVP_CIPHER_mode(c); + if(kn != c->key_len) + { + fprintf(stderr,"Key length doesn't match, got %d expected %lu\n",kn, +@@ -158,9 +159,19 @@ static void test1(const EVP_CIPHER *c,co + test1_exit(5); + } + EVP_CIPHER_CTX_init(&ctx); ++ EVP_CIPHER_CTX_set_flags(&ctx,EVP_CIPHER_CTX_FLAG_WRAP_ALLOW); + if (encdec != 0) + { +- if(!EVP_EncryptInit_ex(&ctx,c,NULL,key,iv)) ++ if (mode == EVP_CIPH_WRAP_MODE) ++ { ++ if(!EVP_EncryptInit_ex(&ctx,c,NULL,key,in ? iv : NULL)) ++ { ++ fprintf(stderr,"EncryptInit failed\n"); ++ ERR_print_errors_fp(stderr); ++ test1_exit(10); ++ } ++ } ++ else if(!EVP_EncryptInit_ex(&ctx,c,NULL,key,iv)) + { + fprintf(stderr,"EncryptInit failed\n"); + ERR_print_errors_fp(stderr); +@@ -199,7 +210,16 @@ static void test1(const EVP_CIPHER *c,co + + if (encdec <= 0) + { +- if(!EVP_DecryptInit_ex(&ctx,c,NULL,key,iv)) ++ if (mode == EVP_CIPH_WRAP_MODE) ++ { ++ if(!EVP_DecryptInit_ex(&ctx,c,NULL,key,in ? iv : NULL)) ++ { ++ fprintf(stderr,"EncryptInit failed\n"); ++ ERR_print_errors_fp(stderr); ++ test1_exit(10); ++ } ++ } ++ else if(!EVP_DecryptInit_ex(&ctx,c,NULL,key,iv)) + { + fprintf(stderr,"DecryptInit failed\n"); + ERR_print_errors_fp(stderr); +@@ -339,7 +359,7 @@ int main(int argc,char **argv) + perror(szTestFile); + EXIT(2); + } +- ++ ERR_load_crypto_strings(); + /* Load up the software EVP_CIPHER and EVP_MD definitions */ + OpenSSL_add_all_ciphers(); + OpenSSL_add_all_digests(); +diff -up openssl-1.0.1e/crypto/evp/evptests.txt.wrap openssl-1.0.1e/crypto/evp/evptests.txt +--- openssl-1.0.1e/crypto/evp/evptests.txt.wrap 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/crypto/evp/evptests.txt 2014-09-09 16:12:25.856801673 +0200 +@@ -332,3 +332,15 @@ SEED-ECB:0000000000000000000000000000000 + SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1 + SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1 + SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1 ++ ++# AES wrap tests from RFC3394 ++id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5 ++id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D ++id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7 ++id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2 ++id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1 ++id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21 ++# AES wrap tests from RFC5649 ++id-aes192-wrap-pad:5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8::c37b7e6492584340bed12207808941155068f738:138bdeaa9b8fa7fc61f97742e72248ee5ae6ae5360d1ae6a5f54f373fa543b6a ++id-aes192-wrap-pad:5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8::466f7250617369:afbeb0f07dfbf5419200f2ccb50bb24f ++ +diff -up openssl-1.0.1e/crypto/modes/Makefile.wrap openssl-1.0.1e/crypto/modes/Makefile +--- openssl-1.0.1e/crypto/modes/Makefile.wrap 2014-09-09 16:11:24.079378796 +0200 ++++ openssl-1.0.1e/crypto/modes/Makefile 2014-09-09 16:12:25.856801673 +0200 +@@ -22,9 +22,9 @@ APPS= + + LIB=$(TOP)/libcrypto.a + LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \ +- ccm128.c xts128.c ++ ccm128.c xts128.c wrap128.c + LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \ +- ccm128.o xts128.o $(MODES_ASM_OBJ) ++ ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ) + + SRC= $(LIBSRC) + +diff -up openssl-1.0.1e/crypto/modes/modes.h.wrap openssl-1.0.1e/crypto/modes/modes.h +--- openssl-1.0.1e/crypto/modes/modes.h.wrap 2014-09-09 16:11:23.726370665 +0200 ++++ openssl-1.0.1e/crypto/modes/modes.h 2014-09-09 16:12:25.857801695 +0200 +@@ -133,3 +133,17 @@ typedef struct xts128_context XTS128_CON + + int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16], + const unsigned char *inp, unsigned char *out, size_t len, int enc); ++ ++size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, ++ unsigned char *out, ++ const unsigned char *in, size_t inlen, block128_f block); ++ ++size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, ++ unsigned char *out, ++ const unsigned char *in, size_t inlen, block128_f block); ++size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv, ++ unsigned char *out, ++ const unsigned char *in, size_t inlen, block128_f block); ++size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv, ++ unsigned char *out, ++ const unsigned char *in, size_t inlen, block128_f block); +diff -up openssl-1.0.1e/crypto/modes/wrap128.c.wrap openssl-1.0.1e/crypto/modes/wrap128.c +--- openssl-1.0.1e/crypto/modes/wrap128.c.wrap 2014-09-09 16:12:25.857801695 +0200 ++++ openssl-1.0.1e/crypto/modes/wrap128.c 2014-09-09 16:12:25.857801695 +0200 +@@ -0,0 +1,372 @@ ++/* crypto/modes/wrap128.c */ ++/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL ++ * project. ++ * Mode with padding contributed by Petr Spacek (pspacek@redhat.com). ++ */ ++/* ==================================================================== ++ * Copyright (c) 2013 The OpenSSL Project. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in ++ * the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * 3. All advertising materials mentioning features or use of this ++ * software must display the following acknowledgment: ++ * "This product includes software developed by the OpenSSL Project ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" ++ * ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to ++ * endorse or promote products derived from this software without ++ * prior written permission. For written permission, please contact ++ * licensing@OpenSSL.org. ++ * ++ * 5. Products derived from this software may not be called "OpenSSL" ++ * nor may "OpenSSL" appear in their names without prior written ++ * permission of the OpenSSL Project. ++ * ++ * 6. Redistributions of any form whatsoever must retain the following ++ * acknowledgment: ++ * "This product includes software developed by the OpenSSL Project ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ==================================================================== ++ */ ++ ++/** Beware! ++ * ++ * Following wrapping modes were designed for AES but this implementation ++ * allows you to use them for any 128 bit block cipher. ++ */ ++ ++#include "cryptlib.h" ++#include ++ ++/** RFC 3394 section 2.2.3.1 Default Initial Value */ ++static const unsigned char default_iv[] = { ++ 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, ++}; ++ ++/** RFC 5649 section 3 Alternative Initial Value 32-bit constant */ ++static const unsigned char default_aiv[] = { ++ 0xA6, 0x59, 0x59, 0xA6 ++}; ++ ++/** Input size limit: lower than maximum of standards but far larger than ++ * anything that will be used in practice. ++ */ ++#define CRYPTO128_WRAP_MAX (1UL << 31) ++ ++/** Wrapping according to RFC 3394 section 2.2.1. ++ * ++ * @param[in] key Key value. ++ * @param[in] iv IV value. Length = 8 bytes. NULL = use default_iv. ++ * @param[in] in Plain text as n 64-bit blocks, n >= 2. ++ * @param[in] inlen Length of in. ++ * @param[out] out Cipher text. Minimal buffer length = (inlen + 8) bytes. ++ * Input and output buffers can overlap if block function ++ * supports that. ++ * @param[in] block Block processing function. ++ * @return 0 if inlen does not consist of n 64-bit blocks, n >= 2. ++ * or if inlen > CRYPTO128_WRAP_MAX. ++ * Output length if wrapping succeeded. ++ */ ++size_t CRYPTO_128_wrap(void *key, const unsigned char *iv, ++ unsigned char *out, ++ const unsigned char *in, size_t inlen, block128_f block) ++ { ++ unsigned char *A, B[16], *R; ++ size_t i, j, t; ++ if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX)) ++ return 0; ++ A = B; ++ t = 1; ++ memmove(out + 8, in, inlen); ++ if (!iv) ++ iv = default_iv; ++ ++ memcpy(A, iv, 8); ++ ++ for (j = 0; j < 6; j++) ++ { ++ R = out + 8; ++ for (i = 0; i < inlen; i += 8, t++, R += 8) ++ { ++ memcpy(B + 8, R, 8); ++ block(B, B, key); ++ A[7] ^= (unsigned char)(t & 0xff); ++ if (t > 0xff) ++ { ++ A[6] ^= (unsigned char)((t >> 8) & 0xff); ++ A[5] ^= (unsigned char)((t >> 16) & 0xff); ++ A[4] ^= (unsigned char)((t >> 24) & 0xff); ++ } ++ memcpy(R, B + 8, 8); ++ } ++ } ++ memcpy(out, A, 8); ++ return inlen + 8; ++ } ++ ++ ++/** Unwrapping according to RFC 3394 section 2.2.2 steps 1-2. ++ * IV check (step 3) is responsibility of the caller. ++ * ++ * @param[in] key Key value. ++ * @param[out] iv Unchecked IV value. Minimal buffer length = 8 bytes. ++ * @param[out] out Plain text without IV. ++ * Minimal buffer length = (inlen - 8) bytes. ++ * Input and output buffers can overlap if block function ++ * supports that. ++ * @param[in] in Ciphertext text as n 64-bit blocks ++ * @param[in] inlen Length of in. ++ * @param[in] block Block processing function. ++ * @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX] ++ * or if inlen is not multiply of 8. ++ * Output length otherwise. ++ */ ++static size_t crypto_128_unwrap_raw(void *key, unsigned char *iv, ++ unsigned char *out, const unsigned char *in, ++ size_t inlen, block128_f block) ++ { ++ unsigned char *A, B[16], *R; ++ size_t i, j, t; ++ inlen -= 8; ++ if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX)) ++ return 0; ++ A = B; ++ t = 6 * (inlen >> 3); ++ memcpy(A, in, 8); ++ memmove(out, in + 8, inlen); ++ for (j = 0; j < 6; j++) ++ { ++ R = out + inlen - 8; ++ for (i = 0; i < inlen; i += 8, t--, R -= 8) ++ { ++ A[7] ^= (unsigned char)(t & 0xff); ++ if (t > 0xff) ++ { ++ A[6] ^= (unsigned char)((t >> 8) & 0xff); ++ A[5] ^= (unsigned char)((t >> 16) & 0xff); ++ A[4] ^= (unsigned char)((t >> 24) & 0xff); ++ } ++ memcpy(B + 8, R, 8); ++ block(B, B, key); ++ memcpy(R, B + 8, 8); ++ } ++ } ++ memcpy(iv, A, 8); ++ return inlen; ++ } ++ ++/** Unwrapping according to RFC 3394 section 2.2.2 including IV check. ++ * First block of plain text have to match supplied IV otherwise an error is ++ * returned. ++ * ++ * @param[in] key Key value. ++ * @param[out] iv Unchecked IV value. Minimal buffer length = 8 bytes. ++ * @param[out] out Plain text without IV. ++ * Minimal buffer length = (inlen - 8) bytes. ++ * Input and output buffers can overlap if block function ++ * supports that. ++ * @param[in] in Ciphertext text as n 64-bit blocks ++ * @param[in] inlen Length of in. ++ * @param[in] block Block processing function. ++ * @return 0 if inlen is out of range [24, CRYPTO128_WRAP_MAX] ++ * or if inlen is not multiply of 8 ++ * or if IV doesn't match expected value. ++ * Output length otherwise. ++ */ ++size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv, ++ unsigned char *out, const unsigned char *in, size_t inlen, ++ block128_f block) ++ { ++ size_t ret; ++ unsigned char got_iv[8]; ++ ++ ret = crypto_128_unwrap_raw(key, got_iv, out, in, inlen, block); ++ if (ret != inlen) ++ return ret; ++ ++ if (!iv) ++ iv = default_iv; ++ if (CRYPTO_memcmp(out, iv, 8)) ++ { ++ OPENSSL_cleanse(out, inlen); ++ return 0; ++ } ++ return inlen; ++ } ++ ++/** Wrapping according to RFC 5649 section 4.1. ++ * ++ * @param[in] key Key value. ++ * @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv. ++ * @param[out] out Cipher text. Minimal buffer length = (inlen + 15) bytes. ++ * Input and output buffers can overlap if block function ++ * supports that. ++ * @param[in] in Plain text as n 64-bit blocks, n >= 2. ++ * @param[in] inlen Length of in. ++ * @param[in] block Block processing function. ++ * @return 0 if inlen is out of range [1, CRYPTO128_WRAP_MAX]. ++ * Output length if wrapping succeeded. ++ */ ++size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv, ++ unsigned char *out, ++ const unsigned char *in, size_t inlen, block128_f block) ++ { ++ /* n: number of 64-bit blocks in the padded key data */ ++ const size_t blocks_padded = (inlen + 8) / 8; ++ const size_t padded_len = blocks_padded * 8; ++ const size_t padding_len = padded_len - inlen; ++ /* RFC 5649 section 3: Alternative Initial Value */ ++ unsigned char aiv[8]; ++ int ret; ++ ++ /* Section 1: use 32-bit fixed field for plaintext octet length */ ++ if (inlen == 0 || inlen >= CRYPTO128_WRAP_MAX) ++ return 0; ++ ++ /* Section 3: Alternative Initial Value */ ++ if (!icv) ++ memcpy(aiv, default_aiv, 4); ++ else ++ memcpy(aiv, icv, 4); /* Standard doesn't mention this. */ ++ ++ aiv[4] = (inlen >> 24) & 0xFF; ++ aiv[5] = (inlen >> 16) & 0xFF; ++ aiv[6] = (inlen >> 8) & 0xFF; ++ aiv[7] = inlen & 0xFF; ++ ++ if (padded_len == 8) ++ { ++ /* Section 4.1 - special case in step 2: ++ * If the padded plaintext contains exactly eight octets, then ++ * prepend the AIV and encrypt the resulting 128-bit block ++ * using AES in ECB mode. */ ++ memmove(out + 8, in, inlen); ++ memcpy(out, aiv, 8); ++ memset(out + 8 + inlen, 0, padding_len); ++ block(out, out, key); ++ ret = 16; /* AIV + padded input */ ++ } ++ else ++ { ++ memmove(out, in, inlen); ++ memset(out + inlen, 0, padding_len); /* Section 4.1 step 1 */ ++ ret = CRYPTO_128_wrap(key, aiv, out, out, padded_len, block); ++ } ++ ++ return ret; ++ } ++ ++/** Unwrapping according to RFC 5649 section 4.2. ++ * ++ * @param[in] key Key value. ++ * @param[in] icv (Non-standard) IV, 4 bytes. NULL = use default_aiv. ++ * @param[out] out Plain text. Minimal buffer length = inlen bytes. ++ * Input and output buffers can overlap if block function ++ * supports that. ++ * @param[in] in Ciphertext text as n 64-bit blocks ++ * @param[in] inlen Length of in. ++ * @param[in] block Block processing function. ++ * @return 0 if inlen is out of range [16, CRYPTO128_WRAP_MAX], ++ * or if inlen is not multiply of 8 ++ * or if IV and message length indicator doesn't match. ++ * Output length if unwrapping succeeded and IV matches. ++ */ ++size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv, ++ unsigned char *out, ++ const unsigned char *in, size_t inlen, block128_f block) ++ { ++ /* n: number of 64-bit blocks in the padded key data */ ++ size_t n = inlen / 8 - 1; ++ size_t padded_len; ++ size_t padding_len; ++ size_t ptext_len; ++ /* RFC 5649 section 3: Alternative Initial Value */ ++ unsigned char aiv[8]; ++ static unsigned char zeros[8] = {0x0}; ++ size_t ret; ++ ++ /* Section 4.2: Cipher text length has to be (n+1) 64-bit blocks. */ ++ if ((inlen & 0x7) != 0 || inlen < 16 || inlen >= CRYPTO128_WRAP_MAX) ++ return 0; ++ ++ memmove(out, in, inlen); ++ if (inlen == 16) ++ { ++ /* Section 4.2 - special case in step 1: ++ * When n=1, the ciphertext contains exactly two 64-bit ++ * blocks and they are decrypted as a single AES ++ * block using AES in ECB mode: ++ * AIV | P[1] = DEC(K, C[0] | C[1]) ++ */ ++ block(out, out, key); ++ memcpy(aiv, out, 8); ++ /* Remove AIV */ ++ memmove(out, out + 8, 8); ++ padded_len = 8; ++ } ++ else ++ { ++ padded_len = inlen - 8; ++ ret = crypto_128_unwrap_raw(key, aiv, out, out, inlen, block); ++ if (padded_len != ret) ++ { ++ OPENSSL_cleanse(out, inlen); ++ return 0; ++ } ++ } ++ ++ /* Section 3: AIV checks: Check that MSB(32,A) = A65959A6. ++ * Optionally a user-supplied value can be used ++ * (even if standard doesn't mention this). */ ++ if ((!icv && CRYPTO_memcmp(aiv, default_aiv, 4)) ++ || (icv && CRYPTO_memcmp(aiv, icv, 4))) ++ { ++ OPENSSL_cleanse(out, inlen); ++ return 0; ++ } ++ ++ /* Check that 8*(n-1) < LSB(32,AIV) <= 8*n. ++ * If so, let ptext_len = LSB(32,AIV). */ ++ ++ ptext_len = (aiv[4] << 24) | (aiv[5] << 16) | (aiv[6] << 8) | aiv[7]; ++ if (8*(n-1) >= ptext_len || ptext_len > 8*n) ++ { ++ OPENSSL_cleanse(out, inlen); ++ return 0; ++ } ++ ++ /* Check that the rightmost padding_len octets of the output data ++ * are zero. */ ++ padding_len = padded_len - ptext_len; ++ if (CRYPTO_memcmp(out + ptext_len, zeros, padding_len) != 0) ++ { ++ OPENSSL_cleanse(out, inlen); ++ return 0; ++ } ++ ++ /* Section 4.2 step 3: Remove padding */ ++ return ptext_len; ++ } diff --git a/SOURCES/openssl-1.0.1e-fallback-scsv.patch b/SOURCES/openssl-1.0.1e-fallback-scsv.patch index 0e28c00..0c307c3 100644 --- a/SOURCES/openssl-1.0.1e-fallback-scsv.patch +++ b/SOURCES/openssl-1.0.1e-fallback-scsv.patch @@ -78,8 +78,8 @@ diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_mode.pod.fallback-scsv openssl-1.0.1 =head1 RETURN VALUES diff -up openssl-1.0.1e/ssl/dtls1.h.fallback-scsv openssl-1.0.1e/ssl/dtls1.h ---- openssl-1.0.1e/ssl/dtls1.h.fallback-scsv 2014-10-15 14:39:30.862907615 +0200 -+++ openssl-1.0.1e/ssl/dtls1.h 2014-10-15 14:39:30.973910121 +0200 +--- openssl-1.0.1e/ssl/dtls1.h.fallback-scsv 2014-10-15 14:45:25.492913542 +0200 ++++ openssl-1.0.1e/ssl/dtls1.h 2014-10-15 14:45:25.596915890 +0200 @@ -84,6 +84,8 @@ extern "C" { #endif @@ -95,8 +95,8 @@ diff -up openssl-1.0.1e/ssl/dtls1.h.fallback-scsv openssl-1.0.1e/ssl/dtls1.h #endif - diff -up openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv openssl-1.0.1e/ssl/d1_lib.c ---- openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv 2014-10-15 14:39:30.911908721 +0200 -+++ openssl-1.0.1e/ssl/d1_lib.c 2014-10-15 14:39:30.973910121 +0200 +--- openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv 2014-10-15 14:45:25.539914603 +0200 ++++ openssl-1.0.1e/ssl/d1_lib.c 2014-10-15 14:45:25.596915890 +0200 @@ -263,6 +263,16 @@ long dtls1_ctrl(SSL *s, int cmd, long la case DTLS_CTRL_LISTEN: ret = dtls1_listen(s, parg); @@ -116,7 +116,7 @@ diff -up openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv openssl-1.0.1e/ssl/d1_lib.c ret = ssl3_ctrl(s, cmd, larg, parg); diff -up openssl-1.0.1e/ssl/ssl_err.c.fallback-scsv openssl-1.0.1e/ssl/ssl_err.c --- openssl-1.0.1e/ssl/ssl_err.c.fallback-scsv 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/ssl_err.c 2014-10-15 14:39:30.973910121 +0200 ++++ openssl-1.0.1e/ssl/ssl_err.c 2014-10-15 14:45:25.596915890 +0200 @@ -382,6 +382,7 @@ static ERR_STRING_DATA SSL_str_reasons[] {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST) ,"https proxy request"}, {ERR_REASON(SSL_R_HTTP_REQUEST) ,"http request"}, @@ -134,8 +134,8 @@ diff -up openssl-1.0.1e/ssl/ssl_err.c.fallback-scsv openssl-1.0.1e/ssl/ssl_err.c {ERR_REASON(SSL_R_TLSV1_ALERT_INTERNAL_ERROR),"tlsv1 alert internal error"}, {ERR_REASON(SSL_R_TLSV1_ALERT_NO_RENEGOTIATION),"tlsv1 alert no renegotiation"}, diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h ---- openssl-1.0.1e/ssl/ssl.h.fallback-scsv 2014-10-15 14:39:30.940909375 +0200 -+++ openssl-1.0.1e/ssl/ssl.h 2014-10-15 14:41:46.174962343 +0200 +--- openssl-1.0.1e/ssl/ssl.h.fallback-scsv 2014-10-15 14:45:25.588915709 +0200 ++++ openssl-1.0.1e/ssl/ssl.h 2014-10-15 14:47:04.423146935 +0200 @@ -638,6 +638,10 @@ struct ssl_session_st * TLS only.) "Released" buffers are put onto a free-list in the context * or just freed (depending on the context's setting for freelist_max_len). */ @@ -155,8 +155,8 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h #define SSL_ERROR_NONE 0 #define SSL_ERROR_SSL 1 -@@ -1565,6 +1570,8 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) - +@@ -1566,6 +1571,8 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION) + #define SSL_CTRL_SET_ECDH_AUTO 94 #define SSL_CTRL_GET_SERVER_TMP_KEY 109 +#define SSL_CTRL_CHECK_PROTO_VERSION 119 @@ -164,7 +164,7 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h #define DTLSv1_get_timeout(ssl, arg) \ SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)arg) #define DTLSv1_handle_timeout(ssl) \ -@@ -2298,6 +2305,7 @@ void ERR_load_SSL_strings(void); +@@ -2304,6 +2311,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_HTTPS_PROXY_REQUEST 155 #define SSL_R_HTTP_REQUEST 156 #define SSL_R_ILLEGAL_PADDING 283 @@ -172,7 +172,7 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h #define SSL_R_INCONSISTENT_COMPRESSION 340 #define SSL_R_INVALID_CHALLENGE_LENGTH 158 #define SSL_R_INVALID_COMMAND 280 -@@ -2444,6 +2452,7 @@ void ERR_load_SSL_strings(void); +@@ -2450,6 +2458,7 @@ void ERR_load_SSL_strings(void); #define SSL_R_TLSV1_ALERT_DECRYPTION_FAILED 1021 #define SSL_R_TLSV1_ALERT_DECRYPT_ERROR 1051 #define SSL_R_TLSV1_ALERT_EXPORT_RESTRICTION 1060 @@ -181,8 +181,8 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h #define SSL_R_TLSV1_ALERT_INTERNAL_ERROR 1080 #define SSL_R_TLSV1_ALERT_NO_RENEGOTIATION 1100 diff -up openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv openssl-1.0.1e/ssl/ssl_lib.c ---- openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv 2014-10-15 14:39:30.912908743 +0200 -+++ openssl-1.0.1e/ssl/ssl_lib.c 2014-10-15 14:39:30.975910166 +0200 +--- openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv 2014-10-15 14:45:25.589915731 +0200 ++++ openssl-1.0.1e/ssl/ssl_lib.c 2014-10-15 14:45:25.597915912 +0200 @@ -1383,6 +1383,8 @@ int ssl_cipher_list_to_bytes(SSL *s,STAC if (sk == NULL) return(0); @@ -289,8 +289,8 @@ diff -up openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv openssl-1.0.1e/ssl/ssl_lib.c p+=n; if (c != NULL) diff -up openssl-1.0.1e/ssl/ssl3.h.fallback-scsv openssl-1.0.1e/ssl/ssl3.h ---- openssl-1.0.1e/ssl/ssl3.h.fallback-scsv 2014-10-15 14:39:30.949909579 +0200 -+++ openssl-1.0.1e/ssl/ssl3.h 2014-10-15 14:39:30.975910166 +0200 +--- openssl-1.0.1e/ssl/ssl3.h.fallback-scsv 2014-10-15 14:45:25.570915303 +0200 ++++ openssl-1.0.1e/ssl/ssl3.h 2014-10-15 14:45:25.598915935 +0200 @@ -128,9 +128,14 @@ extern "C" { #endif @@ -308,8 +308,8 @@ diff -up openssl-1.0.1e/ssl/ssl3.h.fallback-scsv openssl-1.0.1e/ssl/ssl3.h #define SSL3_CK_RSA_NULL_SHA 0x03000002 #define SSL3_CK_RSA_RC4_40_MD5 0x03000003 diff -up openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv openssl-1.0.1e/ssl/s2_lib.c ---- openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv 2014-10-15 14:39:30.901908495 +0200 -+++ openssl-1.0.1e/ssl/s2_lib.c 2014-10-15 14:39:30.975910166 +0200 +--- openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv 2014-10-15 14:45:25.526914309 +0200 ++++ openssl-1.0.1e/ssl/s2_lib.c 2014-10-15 14:45:25.598915935 +0200 @@ -391,6 +391,8 @@ long ssl2_ctrl(SSL *s, int cmd, long lar case SSL_CTRL_GET_SESSION_REUSED: ret=s->hit; @@ -330,7 +330,7 @@ diff -up openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv openssl-1.0.1e/ssl/s2_lib.c p[2]=((unsigned char)(l ))&0xFF; diff -up openssl-1.0.1e/ssl/s23_clnt.c.fallback-scsv openssl-1.0.1e/ssl/s23_clnt.c --- openssl-1.0.1e/ssl/s23_clnt.c.fallback-scsv 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/s23_clnt.c 2014-10-15 14:39:30.975910166 +0200 ++++ openssl-1.0.1e/ssl/s23_clnt.c 2014-10-15 14:45:25.598915935 +0200 @@ -715,6 +715,9 @@ static int ssl23_get_server_hello(SSL *s goto err; } @@ -342,8 +342,8 @@ diff -up openssl-1.0.1e/ssl/s23_clnt.c.fallback-scsv openssl-1.0.1e/ssl/s23_clnt { /* fatal alert */ diff -up openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv openssl-1.0.1e/ssl/s23_srvr.c ---- openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv 2014-10-15 14:39:30.966909962 +0200 -+++ openssl-1.0.1e/ssl/s23_srvr.c 2014-10-15 14:39:30.976910188 +0200 +--- openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv 2014-10-15 14:45:25.584915619 +0200 ++++ openssl-1.0.1e/ssl/s23_srvr.c 2014-10-15 14:45:25.598915935 +0200 @@ -421,6 +421,9 @@ int ssl23_get_client_hello(SSL *s) } } @@ -356,7 +356,7 @@ diff -up openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv openssl-1.0.1e/ssl/s23_srvr { diff -up openssl-1.0.1e/ssl/s3_enc.c.fallback-scsv openssl-1.0.1e/ssl/s3_enc.c --- openssl-1.0.1e/ssl/s3_enc.c.fallback-scsv 2013-02-11 16:26:04.000000000 +0100 -+++ openssl-1.0.1e/ssl/s3_enc.c 2014-10-15 14:39:30.976910188 +0200 ++++ openssl-1.0.1e/ssl/s3_enc.c 2014-10-15 14:45:25.598915935 +0200 @@ -892,7 +892,7 @@ int ssl3_alert_code(int code) case SSL_AD_BAD_CERTIFICATE_STATUS_RESPONSE: return(SSL3_AD_HANDSHAKE_FAILURE); case SSL_AD_BAD_CERTIFICATE_HASH_VALUE: return(SSL3_AD_HANDSHAKE_FAILURE); @@ -367,9 +367,9 @@ diff -up openssl-1.0.1e/ssl/s3_enc.c.fallback-scsv openssl-1.0.1e/ssl/s3_enc.c } - diff -up openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv openssl-1.0.1e/ssl/s3_lib.c ---- openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv 2014-10-15 14:39:30.941909398 +0200 -+++ openssl-1.0.1e/ssl/s3_lib.c 2014-10-15 14:39:30.976910188 +0200 -@@ -3388,6 +3388,33 @@ long ssl3_ctrl(SSL *s, int cmd, long lar +--- openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv 2014-10-15 14:45:25.590915754 +0200 ++++ openssl-1.0.1e/ssl/s3_lib.c 2014-10-15 14:45:25.599915957 +0200 +@@ -3394,6 +3394,33 @@ long ssl3_ctrl(SSL *s, int cmd, long lar EVP_PKEY_free(ptmp); return 0; } @@ -403,7 +403,7 @@ diff -up openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv openssl-1.0.1e/ssl/s3_lib.c default: break; } -@@ -3747,6 +3774,7 @@ long ssl3_ctx_callback_ctrl(SSL_CTX *ctx +@@ -3759,6 +3786,7 @@ long ssl3_ctx_callback_ctrl(SSL_CTX *ctx break; #endif #endif @@ -411,14 +411,14 @@ diff -up openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv openssl-1.0.1e/ssl/s3_lib.c default: return(0); } -@@ -4317,4 +4345,3 @@ long ssl_get_algorithm2(SSL *s) +@@ -4337,4 +4365,3 @@ long ssl_get_algorithm2(SSL *s) return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256; return alg2; } - diff -up openssl-1.0.1e/ssl/tls1.h.fallback-scsv openssl-1.0.1e/ssl/tls1.h ---- openssl-1.0.1e/ssl/tls1.h.fallback-scsv 2014-10-15 14:39:30.775905650 +0200 -+++ openssl-1.0.1e/ssl/tls1.h 2014-10-15 14:39:30.976910188 +0200 +--- openssl-1.0.1e/ssl/tls1.h.fallback-scsv 2014-10-15 14:45:25.382911058 +0200 ++++ openssl-1.0.1e/ssl/tls1.h 2014-10-15 14:45:25.599915957 +0200 @@ -159,17 +159,19 @@ extern "C" { #define TLS1_ALLOW_EXPERIMENTAL_CIPHERSUITES 0 @@ -454,8 +454,8 @@ diff -up openssl-1.0.1e/ssl/tls1.h.fallback-scsv openssl-1.0.1e/ssl/tls1.h #define TLS1_AD_NO_RENEGOTIATION 100 /* codes 110-114 are from RFC3546 */ diff -up openssl-1.0.1e/ssl/t1_enc.c.fallback-scsv openssl-1.0.1e/ssl/t1_enc.c ---- openssl-1.0.1e/ssl/t1_enc.c.fallback-scsv 2014-10-15 14:39:30.936909285 +0200 -+++ openssl-1.0.1e/ssl/t1_enc.c 2014-10-15 14:39:30.977910211 +0200 +--- openssl-1.0.1e/ssl/t1_enc.c.fallback-scsv 2014-10-15 14:45:25.557915009 +0200 ++++ openssl-1.0.1e/ssl/t1_enc.c 2014-10-15 14:45:25.599915957 +0200 @@ -1265,6 +1265,7 @@ int tls1_alert_code(int code) case SSL_AD_BAD_CERTIFICATE_STATUS_RESPONSE: return(TLS1_AD_BAD_CERTIFICATE_STATUS_RESPONSE); case SSL_AD_BAD_CERTIFICATE_HASH_VALUE: return(TLS1_AD_BAD_CERTIFICATE_HASH_VALUE); diff --git a/SOURCES/openssl-1.0.1e-fips-ec.patch b/SOURCES/openssl-1.0.1e-fips-ec.patch index 7287dae..e1f648c 100644 --- a/SOURCES/openssl-1.0.1e-fips-ec.patch +++ b/SOURCES/openssl-1.0.1e-fips-ec.patch @@ -241,7 +241,7 @@ diff -up openssl-1.0.1e/crypto/ec/ec_key.c.fips-ec openssl-1.0.1e/crypto/ec/ec_k + + EVP_PKEY_set1_EC_KEY(pk, key); + -+ if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL)) ++ if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL)) + ret = 1; + + err: diff --git a/SOURCES/openssl-1.0.1e-fips.patch b/SOURCES/openssl-1.0.1e-fips.patch index f5496a0..d1a7e7f 100644 --- a/SOURCES/openssl-1.0.1e-fips.patch +++ b/SOURCES/openssl-1.0.1e-fips.patch @@ -1008,7 +1008,7 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_ + + EVP_PKEY_set1_DSA(pk, dsa); + -+ if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL)) ++ if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL)) + ret = 1; + + err: @@ -8660,7 +8660,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1e/cryp diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips.c --- openssl-1.0.1e/crypto/fips/fips.c.fips 2013-10-04 11:48:04.182694181 +0200 +++ openssl-1.0.1e/crypto/fips/fips.c 2013-10-04 11:48:04.182694181 +0200 -@@ -0,0 +1,489 @@ +@@ -0,0 +1,491 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. + * @@ -8990,6 +8990,8 @@ diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips. + } + free(buf); + free(hex); ++ } else { ++ rv = -1; + } + +end: @@ -18135,7 +18137,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1e/cryp diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c --- openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips 2013-10-04 11:48:04.188694316 +0200 +++ openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c 2013-10-04 11:48:04.188694316 +0200 -@@ -0,0 +1,180 @@ +@@ -0,0 +1,236 @@ +/* ==================================================================== + * Copyright (c) 2003 The OpenSSL Project. All rights reserved. + * @@ -18195,17 +18197,73 @@ diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/c +#ifndef FIPSCANISTER_O +int FIPS_selftest_failed() { return 0; } +void FIPS_selftest_check() {} -+void OPENSSL_cleanse(void *p,size_t len) {} +#endif + ++#ifdef OPENSSL_FIPS ++int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; }; ++int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; }; ++ +#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__INTEL__) || \ + defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) + +unsigned int OPENSSL_ia32cap_P[2]; ++unsigned long *OPENSSL_ia32cap_loc(void) ++{ if (sizeof(long)==4) ++ /* ++ * If 32-bit application pulls address of OPENSSL_ia32cap_P[0] ++ * clear second element to maintain the illusion that vector ++ * is 32-bit. ++ */ ++ OPENSSL_ia32cap_P[1]=0; ++ return (unsigned long *)OPENSSL_ia32cap_P; ++} ++ ++#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY) ++#define OPENSSL_CPUID_SETUP ++#if defined(_WIN32) ++typedef unsigned __int64 IA32CAP; ++#else ++typedef unsigned long long IA32CAP; ++#endif ++void OPENSSL_cpuid_setup(void) ++{ static int trigger=0; ++ IA32CAP OPENSSL_ia32_cpuid(void); ++ IA32CAP vec; ++ char *env; ++ ++ if (trigger) return; ++ ++ trigger=1; ++ if ((env=getenv("OPENSSL_ia32cap"))) { ++ int off = (env[0]=='~')?1:0; ++#if defined(_WIN32) ++ if (!sscanf(env+off,"%I64i",&vec)) vec = strtoul(env+off,NULL,0); ++#else ++ if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0); ++#endif ++ if (off) vec = OPENSSL_ia32_cpuid()&~vec; ++ } ++ else ++ vec = OPENSSL_ia32_cpuid(); ++ ++ /* ++ * |(1<<10) sets a reserved bit to signal that variable ++ * was initialized already... This is to avoid interference ++ * with cpuid snippets in ELF .init segment. ++ */ ++ OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10); ++ OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32); ++} +#endif + -+#ifdef OPENSSL_FIPS ++#else ++unsigned long *OPENSSL_ia32cap_loc(void) { return NULL; } ++#endif ++int OPENSSL_NONPIC_relocated = 0; ++#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ) ++void OPENSSL_cpuid_setup(void) {} ++#endif + +static void hmac_init(SHA256_CTX *md_ctx,SHA256_CTX *o_ctx, + const char *key) @@ -18911,7 +18969,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips openssl-1.0.1e/crypto diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Makefile --- openssl-1.0.1e/crypto/fips/Makefile.fips 2013-10-04 11:48:04.189694339 +0200 +++ openssl-1.0.1e/crypto/fips/Makefile 2013-10-04 11:48:04.189694339 +0200 -@@ -0,0 +1,340 @@ +@@ -0,0 +1,341 @@ +# +# OpenSSL/crypto/fips/Makefile +# @@ -19004,6 +19062,7 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Mak + +$(EXE): $(PROGRAM).o + FIPS_SHA_ASM=""; for i in $(SHA1_ASM_OBJ) sha256.o; do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../sha/$$i" ; done; \ ++ for i in $(CPUID_OBJ); do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../$$i" ; done; \ + $(CC) -o $@ $(CFLAGS) $(PROGRAM).o $$FIPS_SHA_ASM + +# DO NOT DELETE THIS LINE -- make depend depends on it. diff --git a/SOURCES/openssl-1.0.1e-new-fips-reqs.patch b/SOURCES/openssl-1.0.1e-new-fips-reqs.patch index 055a087..40527ef 100644 --- a/SOURCES/openssl-1.0.1e-new-fips-reqs.patch +++ b/SOURCES/openssl-1.0.1e-new-fips-reqs.patch @@ -36,7 +36,7 @@ diff -up openssl-1.0.1e/crypto/dh/dh.h.fips-reqs openssl-1.0.1e/crypto/dh/dh.h #endif #define OPENSSL_DH_FIPS_MIN_MODULUS_BITS 1024 -+#define OPENSSL_DH_FIPS_MIN_MODULUS_BITS_GEN 2048 ++#define OPENSSL_DH_FIPS_MIN_MODULUS_BITS_GEN (getenv("OPENSSL_ENFORCE_MODULUS_BITS")?2048:1024) #define DH_FLAG_CACHE_MONT_P 0x01 #define DH_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DH @@ -80,11 +80,12 @@ diff -up openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1e/crypto/dh/ diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs openssl-1.0.1e/crypto/dsa/dsa_gen.c --- openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs 2013-12-18 12:17:09.749636636 +0100 +++ openssl-1.0.1e/crypto/dsa/dsa_gen.c 2013-12-18 12:17:09.799637708 +0100 -@@ -159,7 +159,6 @@ int dsa_builtin_paramgen(DSA *ret, size_ +@@ -159,7 +159,7 @@ int dsa_builtin_paramgen(DSA *ret, size_ } if (FIPS_module_mode() && - (bits != 1024 || qbits != 160) && ++ (getenv("OPENSSL_ENFORCE_MODULUS_BITS") || bits != 1024 || qbits != 160) && (bits != 2048 || qbits != 224) && (bits != 2048 || qbits != 256) && (bits != 3072 || qbits != 256)) @@ -95,7 +96,7 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1e/crypto/dsa/dsa #endif #define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS 1024 -+#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN 2048 ++#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN (getenv("OPENSSL_ENFORCE_MODULUS_BITS")?2048:1024) #define DSA_FLAG_CACHE_MONT_P 0x01 #define DSA_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DSA @@ -124,6 +125,42 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1e/crypto/dsa { DSAerr(DSA_F_DSA_BUILTIN_KEYGEN, DSA_R_KEY_SIZE_TOO_SMALL); goto err; +diff -up openssl-1.0.1e/crypto/fips/fips.c.fips-reqs openssl-1.0.1e/crypto/fips/fips.c +--- openssl-1.0.1e/crypto/fips/fips.c.fips-reqs 2014-09-24 16:38:43.000000000 +0200 ++++ openssl-1.0.1e/crypto/fips/fips.c 2014-09-24 16:37:28.000000000 +0200 +@@ -427,27 +427,25 @@ int FIPS_module_mode_set(int onoff, cons + ret = 0; + goto end; + } +- OPENSSL_ia32cap_P[0] |= (1<<28); /* set "shared cache" */ +- OPENSSL_ia32cap_P[1] &= ~(1<<(60-32)); /* clear AVX */ + } + #endif + +- if(!verify_checksums()) ++ if(!FIPS_selftest()) + { +- FIPSerr(FIPS_F_FIPS_MODULE_MODE_SET,FIPS_R_FINGERPRINT_DOES_NOT_MATCH); + fips_selftest_fail = 1; + ret = 0; + goto end; + } + +- if(FIPS_selftest()) +- fips_set_mode(onoff); +- else ++ if(!verify_checksums()) + { ++ FIPSerr(FIPS_F_FIPS_MODULE_MODE_SET,FIPS_R_FINGERPRINT_DOES_NOT_MATCH); + fips_selftest_fail = 1; + ret = 0; + goto end; + } ++ ++ fips_set_mode(onoff); + ret = 1; + goto end; + } diff -up openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_dh_selftest.c --- openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs 2013-12-18 17:06:36.575114314 +0100 +++ openssl-1.0.1e/crypto/fips/fips_dh_selftest.c 2013-12-18 17:26:14.409036334 +0100 @@ -397,29 +434,598 @@ diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1e/crypto/ rv = 0; return rv; diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c ---- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs 2013-12-18 12:17:09.761636893 +0100 -+++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c 2013-12-18 12:17:09.799637708 +0100 -@@ -340,6 +340,42 @@ static const unsigned char kat_RSA_X931_ - 0x60, 0x83, 0x18, 0x88, 0xA3, 0xF5, 0x59, 0xC3 +--- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs 2014-03-14 14:47:18.809259727 +0100 ++++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c 2014-03-14 15:37:26.295687852 +0100 +@@ -60,69 +60,113 @@ + #ifdef OPENSSL_FIPS + + static const unsigned char n[] = +-"\x00\xBB\xF8\x2F\x09\x06\x82\xCE\x9C\x23\x38\xAC\x2B\x9D\xA8\x71" +-"\xF7\x36\x8D\x07\xEE\xD4\x10\x43\xA4\x40\xD6\xB6\xF0\x74\x54\xF5" +-"\x1F\xB8\xDF\xBA\xAF\x03\x5C\x02\xAB\x61\xEA\x48\xCE\xEB\x6F\xCD" +-"\x48\x76\xED\x52\x0D\x60\xE1\xEC\x46\x19\x71\x9D\x8A\x5B\x8B\x80" +-"\x7F\xAF\xB8\xE0\xA3\xDF\xC7\x37\x72\x3E\xE6\xB4\xB7\xD9\x3A\x25" +-"\x84\xEE\x6A\x64\x9D\x06\x09\x53\x74\x88\x34\xB2\x45\x45\x98\x39" +-"\x4E\xE0\xAA\xB1\x2D\x7B\x61\xA5\x1F\x52\x7A\x9A\x41\xF6\xC1\x68" +-"\x7F\xE2\x53\x72\x98\xCA\x2A\x8F\x59\x46\xF8\xE5\xFD\x09\x1D\xBD" +-"\xCB"; ++"\x00\xc9\xd5\x6d\x9d\x90\xdb\x43\xd6\x02\xed\x96\x88\x13\x8a" ++"\xb2\xbf\x6e\xa1\x06\x10\xb2\x78\x37\xa7\x14\xa8\xff\xdd\x00" ++"\xdd\xb4\x93\xa0\x45\xcc\x96\x90\xed\xad\xa9\xdd\xc4\xd6\xca" ++"\x0c\xf0\xed\x4f\x72\x5e\x21\x49\x9a\x18\x12\x15\x8f\x90\x5a" ++"\xdb\xb6\x33\x99\xa3\xe6\xb4\xf0\xc4\x97\x21\x26\xbb\xe3\xba" ++"\xf2\xff\xa0\x72\xda\x89\x63\x8e\x8b\x3e\x08\x9d\x92\x2a\xbe" ++"\x16\xe1\x43\x15\xfc\x57\xc7\x1f\x09\x11\x67\x1c\xa9\x96\xd1" ++"\x8b\x3e\x80\x93\xc1\x59\xd0\x6d\x39\xf2\xac\x95\xcc\x10\x75" ++"\xe9\x31\x24\xd1\x43\xaf\x68\x52\x4b\xe7\x16\xd7\x49\x65\x6f" ++"\x26\xc0\x86\xad\xc0\x07\x0a\xc1\xe1\x2f\x87\x85\x86\x3b\xdc" ++"\x5a\x99\xbe\xe9\xf9\xb9\xe9\x82\x27\x51\x04\x15\xab\x06\x0e" ++"\x76\x5a\x28\x8d\x92\xbd\xc5\xb5\x7b\xa8\xdf\x4e\x47\xa2\xc1" ++"\xe7\x52\xbf\x47\xf7\x62\xe0\x3a\x6f\x4d\x6a\x4d\x4e\xd4\xb9" ++"\x59\x69\xfa\xb2\x14\xc1\xee\xe6\x2f\x95\xcd\x94\x72\xae\xe4" ++"\xdb\x18\x9a\xc4\xcd\x70\xbd\xee\x31\x16\xb7\x49\x65\xac\x40" ++"\x19\x0e\xb5\x6d\x83\xf1\x36\xbb\x08\x2f\x2e\x4e\x92\x62\xa4" ++"\xff\x50\xdb\x20\x45\xa2\xeb\x16\x7a\xf2\xd5\x28\xc1\xfd\x4e" ++"\x03\x71"; ++ + + static int corrupt_rsa; + + static int setrsakey(RSA *key) + { +- static const unsigned char e[] = "\x11"; ++ static const unsigned char e[] = "\x01\x00\x01"; + + static const unsigned char d[] = +-"\x00\xA5\xDA\xFC\x53\x41\xFA\xF2\x89\xC4\xB9\x88\xDB\x30\xC1\xCD" +-"\xF8\x3F\x31\x25\x1E\x06\x68\xB4\x27\x84\x81\x38\x01\x57\x96\x41" +-"\xB2\x94\x10\xB3\xC7\x99\x8D\x6B\xC4\x65\x74\x5E\x5C\x39\x26\x69" +-"\xD6\x87\x0D\xA2\xC0\x82\xA9\x39\xE3\x7F\xDC\xB8\x2E\xC9\x3E\xDA" +-"\xC9\x7F\xF3\xAD\x59\x50\xAC\xCF\xBC\x11\x1C\x76\xF1\xA9\x52\x94" +-"\x44\xE5\x6A\xAF\x68\xC5\x6C\x09\x2C\xD3\x8D\xC3\xBE\xF5\xD2\x0A" +-"\x93\x99\x26\xED\x4F\x74\xA1\x3E\xDD\xFB\xE1\xA1\xCE\xCC\x48\x94" +-"\xAF\x94\x28\xC2\xB7\xB8\x88\x3F\xE4\x46\x3A\x4B\xC8\x5B\x1C\xB3" +-"\xC1"; ++"\x36\x27\x3d\xb1\xf9\x1b\xdb\xa7\xa0\x41\x7f\x12\x23\xac\x23" ++"\x29\x99\xd5\x3a\x7b\x60\x67\x41\x07\x63\x53\xb4\xd2\xe7\x58" ++"\x95\x0a\xc7\x05\xf3\x4e\xb2\xb4\x12\xd4\x70\xdc\x4f\x85\x06" ++"\xd3\xdd\xd8\x63\x27\x3e\x67\x31\x21\x24\x39\x04\xbc\x06\xa4" ++"\xcc\xce\x2b\x7a\xfe\x7b\xad\xde\x11\x6e\xa3\xa5\xe6\x04\x53" ++"\x0e\xa3\x4e\x2d\xb4\x8f\x31\xbf\xca\x75\x25\x52\x02\x85\xde" ++"\x3d\xb2\x72\x43\xb2\x89\x8a\x9a\x34\x41\x26\x3f\x9a\x67\xbe" ++"\xa4\x96\x7b\x0e\x75\xba\xa6\x93\xd5\xb8\xd8\xb8\x57\xf2\x4b" ++"\x0f\x14\x81\xd1\x57\x4e\xf6\x45\x4c\xa6\x3b\xd0\x70\xca\xd3" ++"\x9d\x55\xde\x22\x05\xe7\x8e\x28\x4d\xee\x11\xcf\xb6\x67\x76" ++"\x09\xd3\xe3\x3c\x13\xf9\x99\x34\x10\x7b\xec\x81\x38\xf0\xb6" ++"\x34\x9c\x9b\x50\x6f\x0b\x91\x81\x4d\x89\x94\x04\x7b\xf0\x3c" ++"\xf4\xb1\xb2\x00\x48\x8d\x5a\x8f\x88\x9e\xc5\xab\x3a\x9e\x44" ++"\x3f\x54\xe7\xd9\x6e\x47\xaa\xa1\xbd\x40\x46\x31\xf9\xf0\x34" ++"\xb6\x04\xe1\x2b\x5b\x73\x86\xdd\x3a\x92\x1b\x71\xc7\x3f\x32" ++"\xe5\xc3\xc2\xab\xa1\x7e\xbf\xa4\x52\xa0\xb0\x68\x90\xd1\x20" ++"\x12\x79\xe9\xd7\xc9\x40\xba\xf2\x19\xc7\xa5\x00\x92\x86\x0d" ++"\x01"; + + static const unsigned char p[] = +-"\x00\xEE\xCF\xAE\x81\xB1\xB9\xB3\xC9\x08\x81\x0B\x10\xA1\xB5\x60" +-"\x01\x99\xEB\x9F\x44\xAE\xF4\xFD\xA4\x93\xB8\x1A\x9E\x3D\x84\xF6" +-"\x32\x12\x4E\xF0\x23\x6E\x5D\x1E\x3B\x7E\x28\xFA\xE7\xAA\x04\x0A" +-"\x2D\x5B\x25\x21\x76\x45\x9D\x1F\x39\x75\x41\xBA\x2A\x58\xFB\x65" +-"\x99"; ++"\x00\xfc\x5c\x6e\x16\xce\x1f\x03\x7b\xcd\xf7\xb3\x72\xb2\x8f" ++"\x16\x72\xb8\x56\xae\xf7\xcd\x67\xd8\x4e\x7d\x07\xaf\xd5\x43" ++"\x26\xc3\x35\xbe\x43\x8f\x4e\x2f\x1c\x43\x4e\x6b\xd2\xb2\xec" ++"\x52\x6d\x97\x52\x2b\xcc\x5c\x3a\x6b\xf4\x14\xc6\x74\xda\x66" ++"\x38\x1c\x7a\x3f\x84\x2f\xe3\xf9\x5a\xb8\x65\x69\x46\x06\xa3" ++"\x37\x79\xb2\xa1\x5b\x58\xed\x5e\xa7\x5f\x8c\x65\x66\xbb\xd1" ++"\x24\x36\xe6\x37\xa7\x3d\x49\x77\x8a\x8c\x34\xd8\x69\x29\xf3" ++"\x4d\x58\x22\xb0\x51\x24\xb6\x40\xa8\x86\x59\x0a\xb7\xba\x5c" ++"\x97\xda\x57\xe8\x36\xda\x7a\x9c\xad"; + + static const unsigned char q[] = +-"\x00\xC9\x7F\xB1\xF0\x27\xF4\x53\xF6\x34\x12\x33\xEA\xAA\xD1\xD9" +-"\x35\x3F\x6C\x42\xD0\x88\x66\xB1\xD0\x5A\x0F\x20\x35\x02\x8B\x9D" +-"\x86\x98\x40\xB4\x16\x66\xB4\x2E\x92\xEA\x0D\xA3\xB4\x32\x04\xB5" +-"\xCF\xCE\x33\x52\x52\x4D\x04\x16\xA5\xA4\x41\xE7\x00\xAF\x46\x15" +-"\x03"; ++"\x00\xcc\xbe\x7b\x09\x69\x06\xee\x45\xbf\x88\x47\x38\xa8\xf8" ++"\x17\xe5\xb6\xba\x67\x55\xe3\xe8\x05\x8b\xb8\xe2\x53\xd6\x8e" ++"\xef\x2c\xe7\x4f\x4a\xf7\x4e\x26\x8d\x85\x0b\x3f\xec\xc3\x1c" ++"\xd4\xeb\xec\x6a\xc8\x72\x2a\x25\x7d\xfd\xa6\x77\x96\xf0\x1e" ++"\xcd\x28\x57\xf8\x37\x30\x75\x6b\xbd\xd4\x7b\x0c\x87\xc5\x6c" ++"\x87\x40\xa5\xbb\x27\x2c\x78\xc9\x74\x5a\x54\x5b\x0b\x30\x6f" ++"\x44\x4a\xfa\x71\xe4\x21\x61\x66\xf9\xee\x65\xde\x7c\x04\xd7" ++"\xfd\xa9\x15\x5b\x7f\xe2\x7a\xba\x69\x86\x72\xa6\x06\x8d\x9b" ++"\x90\x55\x60\x9e\x4c\x5d\xa9\xb6\x55"; ++ + + static const unsigned char dmp1[] = +-"\x54\x49\x4C\xA6\x3E\xBA\x03\x37\xE4\xE2\x40\x23\xFC\xD6\x9A\x5A" +-"\xEB\x07\xDD\xDC\x01\x83\xA4\xD0\xAC\x9B\x54\xB0\x51\xF2\xB1\x3E" +-"\xD9\x49\x09\x75\xEA\xB7\x74\x14\xFF\x59\xC1\xF7\x69\x2E\x9A\x2E" +-"\x20\x2B\x38\xFC\x91\x0A\x47\x41\x74\xAD\xC9\x3C\x1F\x67\xC9\x81"; ++"\x7a\xd6\x12\xd0\x0e\xec\x91\xa9\x85\x8b\xf8\x50\xf0\x11\x2e" ++"\x00\x11\x32\x40\x60\x66\x1f\x11\xee\xc2\x75\x27\x65\x4b\x16" ++"\x67\x16\x95\xd2\x14\xc3\x1d\xb3\x48\x1f\xb7\xe4\x0b\x2b\x74" ++"\xc3\xdb\x50\x27\xf9\x85\x3a\xfa\xa9\x08\x23\xc1\x65\x3d\x34" ++"\x3a\xc8\x56\x7a\x65\x45\x36\x6e\xae\x2a\xce\x9f\x43\x43\xd7" ++"\x10\xe9\x9e\x18\xf4\xa4\x35\xda\x8a\x6b\xb0\x3f\xdd\x53\xe3" ++"\xa8\xc5\x4e\x79\x9d\x1f\x51\x8c\xa2\xca\x66\x3c\x6a\x2a\xff" ++"\x8e\xd2\xf3\xb7\xcb\x82\xda\xde\x2c\xe6\xd2\x8c\xb3\xad\xb6" ++"\x4c\x95\x55\x76\xbd\xc9\xc8\xd1"; ++ + + static const unsigned char dmq1[] = +-"\x47\x1E\x02\x90\xFF\x0A\xF0\x75\x03\x51\xB7\xF8\x78\x86\x4C\xA9" +-"\x61\xAD\xBD\x3A\x8A\x7E\x99\x1C\x5C\x05\x56\xA9\x4C\x31\x46\xA7" +-"\xF9\x80\x3F\x8F\x6F\x8A\xE3\x42\xE9\x31\xFD\x8A\xE4\x7A\x22\x0D" +-"\x1B\x99\xA4\x95\x84\x98\x07\xFE\x39\xF9\x24\x5A\x98\x36\xDA\x3D"; +- ++"\x00\x83\x23\x1d\xbb\x11\x42\x17\x2b\x25\x5a\x2c\x03\xe6\x75" ++"\xc1\x18\xa8\xc9\x0b\x96\xbf\xba\xc4\x92\x91\x80\xa5\x22\x2f" ++"\xba\x91\x90\x36\x01\x56\x15\x00\x2c\x74\xa2\x97\xf7\x15\xa1" ++"\x49\xdf\x32\x35\xd2\xdd\x0c\x91\xa6\xf8\xe7\xbe\x81\x36\x9b" ++"\x03\xdc\x6b\x3b\xd8\x5d\x79\x57\xe0\xe6\x4f\x49\xdf\x4c\x5c" ++"\x0e\xe5\x21\x41\x95\xfd\xad\xff\x9a\x3e\xa0\xf9\x0f\x59\x9e" ++"\x6a\xa7\x7b\x71\xa7\x24\x9a\x36\x52\xae\x97\x20\xc1\x5e\x78" ++"\xd9\x47\x8b\x1e\x67\xf2\xaf\x98\xe6\x2d\xef\x10\xd7\xf1\xab" ++"\x49\xee\xe5\x4b\x7e\xae\x1f\x1d\x61"; ++ ++ + static const unsigned char iqmp[] = +-"\x00\xB0\x6C\x4F\xDA\xBB\x63\x01\x19\x8D\x26\x5B\xDB\xAE\x94\x23" +-"\xB3\x80\xF2\x71\xF7\x34\x53\x88\x50\x93\x07\x7F\xCD\x39\xE2\x11" +-"\x9F\xC9\x86\x32\x15\x4F\x58\x83\xB1\x67\xA9\x67\xBF\x40\x2B\x4E" +-"\x9E\x2E\x0F\x96\x56\xE6\x98\xEA\x36\x66\xED\xFB\x25\x79\x80\x39" +-"\xF7"; ++"\x23\x96\xc1\x91\x17\x5e\x0a\x83\xd2\xdc\x7b\x69\xb2\x59\x1d" ++"\x33\x58\x52\x3f\x18\xc7\x09\x50\x1c\xb9\xa1\xbb\x4c\xa2\x38" ++"\x40\x4c\x9a\x8e\xfe\x9c\x90\x92\xd0\x71\x9f\x89\x99\x50\x91" ++"\x1f\x34\x8b\x74\x53\x11\x11\x4a\x70\xe2\xf7\x30\xd8\x8c\x80" ++"\xe1\xcc\x9f\xf1\x63\x17\x1a\x7d\x67\x29\x4c\xcb\x4e\x74\x7b" ++"\xe0\x3e\x9e\x2f\xf4\x67\x8f\xec\xb9\x5c\x00\x1e\x7e\xa2\x7b" ++"\x92\xc9\x6f\x4c\xe4\x0e\xf9\x48\x63\xcd\x50\x22\x5d\xbf\xb6" ++"\x9d\x01\x33\x6a\xf4\x50\xbe\x86\x98\x4f\xca\x3f\x3a\xfa\xcf" ++"\x07\x40\xc4\xaa\xad\xae\xbe\xbf"; + + key->n = BN_bin2bn(n, sizeof(n)-1, key->n); + if (corrupt_rsa) +- BN_set_bit(key->n, 1024); ++ BN_set_bit(key->n, 2048); + key->e = BN_bin2bn(e, sizeof(e)-1, key->e); + key->d = BN_bin2bn(d, sizeof(d)-1, key->d); + key->p = BN_bin2bn(p, sizeof(p)-1, key->p); +@@ -145,201 +189,291 @@ void FIPS_corrupt_rsa() + static const unsigned char kat_tbs[] = "OpenSSL FIPS 140-2 Public Key RSA KAT"; + + static const unsigned char kat_RSA_PSS_SHA1[] = { +- 0x2D, 0xAF, 0x6E, 0xC2, 0x98, 0xFB, 0x8A, 0xA1, 0xB9, 0x46, 0xDA, 0x0F, +- 0x01, 0x1E, 0x37, 0x93, 0xC2, 0x55, 0x27, 0xE4, 0x1D, 0xD2, 0x90, 0xBB, +- 0xF4, 0xBF, 0x4A, 0x74, 0x39, 0x51, 0xBB, 0xE8, 0x0C, 0xB7, 0xF8, 0xD3, +- 0xD1, 0xDF, 0xE7, 0xBE, 0x80, 0x05, 0xC3, 0xB5, 0xC7, 0x83, 0xD5, 0x4C, +- 0x7F, 0x49, 0xFB, 0x3F, 0x29, 0x9B, 0xE1, 0x12, 0x51, 0x60, 0xD0, 0xA7, +- 0x0D, 0xA9, 0x28, 0x56, 0x73, 0xD9, 0x07, 0xE3, 0x5E, 0x3F, 0x9B, 0xF5, +- 0xB6, 0xF3, 0xF2, 0x5E, 0x74, 0xC9, 0x83, 0x81, 0x47, 0xF0, 0xC5, 0x45, +- 0x0A, 0xE9, 0x8E, 0x38, 0xD7, 0x18, 0xC6, 0x2A, 0x0F, 0xF8, 0xB7, 0x31, +- 0xD6, 0x55, 0xE4, 0x66, 0x78, 0x81, 0xD4, 0xE6, 0xDB, 0x9F, 0xBA, 0xE8, +- 0x23, 0xB5, 0x7F, 0xDC, 0x08, 0xEA, 0xD5, 0x26, 0x1E, 0x20, 0x25, 0x84, +- 0x26, 0xC6, 0x79, 0xC9, 0x9B, 0x3D, 0x7E, 0xA9 ++ 0xC2, 0x80, 0x82, 0x56, 0xD8, 0xA7, 0xB2, 0x9C, 0xF5, 0xD6, 0x3C, 0xE3, ++ 0xBF, 0xE9, 0x3A, 0x53, 0x40, 0xAE, 0xF2, 0xA9, 0x6A, 0x39, 0x49, 0x5B, ++ 0x05, 0x7F, 0x67, 0x38, 0x2E, 0x1D, 0xE1, 0x93, 0x22, 0x65, 0x79, 0x84, ++ 0x68, 0xFA, 0xD8, 0xAF, 0xA1, 0x98, 0x61, 0x6F, 0x44, 0x27, 0xA6, 0x8B, ++ 0xCF, 0x0E, 0x13, 0xA9, 0xCE, 0xD7, 0x6C, 0xD2, 0x38, 0xB5, 0x16, 0xB9, ++ 0x66, 0x94, 0x48, 0xDE, 0x9E, 0x19, 0x3D, 0x6F, 0xB3, 0xA1, 0x9A, 0x19, ++ 0xDF, 0xFB, 0xAB, 0xA5, 0x9F, 0x38, 0xDA, 0xC9, 0x21, 0x8F, 0xCE, 0x98, ++ 0x01, 0x3A, 0xC8, 0xE0, 0xDF, 0xDA, 0xFC, 0xF0, 0xA6, 0x86, 0x29, 0xB5, ++ 0x7F, 0x61, 0xFB, 0xBA, 0xC5, 0x49, 0xB2, 0x7C, 0x6A, 0x26, 0x82, 0xC4, ++ 0x8F, 0xAA, 0x5B, 0x10, 0xD5, 0xEE, 0xA0, 0x55, 0x42, 0xEF, 0x32, 0x5A, ++ 0x3F, 0x55, 0xB3, 0x2C, 0x22, 0xE9, 0x65, 0xDA, 0x8D, 0x0A, 0xB9, 0x70, ++ 0x43, 0xCC, 0x3F, 0x64, 0x9C, 0xB5, 0x65, 0x49, 0xBD, 0x7F, 0x35, 0xC1, ++ 0x20, 0x85, 0x24, 0xFE, 0xAA, 0x6B, 0x37, 0x04, 0xA1, 0x0E, 0x9D, 0x5C, ++ 0xBA, 0x7F, 0x14, 0x69, 0xC5, 0x93, 0xB2, 0x33, 0xC2, 0xC0, 0xC7, 0xDF, ++ 0x7E, 0x9E, 0xA4, 0xB0, 0xA0, 0x64, 0xD2, 0xAC, 0xFC, 0xFD, 0xFD, 0x99, ++ 0x8F, 0x6A, 0x40, 0x26, 0xC1, 0x2E, 0x4E, 0x8B, 0x33, 0xBE, 0xF1, 0x45, ++ 0x59, 0x8F, 0x33, 0x40, 0x1D, 0x2A, 0xD2, 0xF7, 0x50, 0x83, 0x89, 0xCF, ++ 0x94, 0xC6, 0xF8, 0x36, 0xF0, 0x84, 0x0B, 0x85, 0xA5, 0x02, 0xA9, 0x0F, ++ 0x41, 0x7A, 0x77, 0xA3, 0x2F, 0x47, 0x1E, 0x1D, 0xEC, 0xE6, 0xD3, 0x01, ++ 0x1E, 0x6F, 0x7A, 0x96, 0x50, 0x37, 0x37, 0x4B, 0x27, 0x52, 0x0B, 0xDC, ++ 0xDB, 0xC7, 0xA9, 0x31, 0xB2, 0x40, 0xEE, 0x60, 0x41, 0x26, 0x6A, 0x05, ++ 0xCE, 0x08, 0x1D, 0x89 + }; + + static const unsigned char kat_RSA_PSS_SHA224[] = { +- 0x39, 0x4A, 0x6A, 0x20, 0xBC, 0xE9, 0x33, 0xED, 0xEF, 0xC5, 0x58, 0xA7, +- 0xFE, 0x81, 0xC4, 0x36, 0x50, 0x9A, 0x2C, 0x82, 0x98, 0x08, 0x95, 0xFA, +- 0xB1, 0x9E, 0xD2, 0x55, 0x61, 0x87, 0x21, 0x59, 0x87, 0x7B, 0x1F, 0x57, +- 0x30, 0x9D, 0x0D, 0x4A, 0x06, 0xEB, 0x52, 0x37, 0x55, 0x54, 0x1C, 0x89, +- 0x83, 0x75, 0x59, 0x65, 0x64, 0x90, 0x2E, 0x16, 0xCC, 0x86, 0x05, 0xEE, +- 0xB1, 0xE6, 0x7B, 0xBA, 0x16, 0x75, 0x0D, 0x0C, 0x64, 0x0B, 0xAB, 0x22, +- 0x15, 0x78, 0x6B, 0x6F, 0xA4, 0xFB, 0x77, 0x40, 0x64, 0x62, 0xD1, 0xB5, +- 0x37, 0x1E, 0xE0, 0x3D, 0xA8, 0xF9, 0xD2, 0xBD, 0xAA, 0x38, 0x24, 0x49, +- 0x58, 0xD2, 0x74, 0x85, 0xF4, 0xB5, 0x93, 0x8E, 0xF5, 0x03, 0xEA, 0x2D, +- 0xC8, 0x52, 0xFA, 0xCF, 0x7E, 0x35, 0xB0, 0x6A, 0xAF, 0x95, 0xC0, 0x00, +- 0x54, 0x76, 0x3D, 0x0C, 0x9C, 0xB2, 0xEE, 0xC0 ++ 0xB4, 0x01, 0x93, 0x16, 0x05, 0xF6, 0xEB, 0xE2, 0xA4, 0xEB, 0x48, 0xAA, ++ 0x00, 0xF4, 0xA1, 0x99, 0x0A, 0xB4, 0xB6, 0x63, 0xE9, 0x68, 0xCA, 0xB3, ++ 0x13, 0xD7, 0x66, 0x6A, 0xCD, 0xCB, 0x33, 0x9F, 0xE5, 0x84, 0xE2, 0xC3, ++ 0x0B, 0x53, 0xE5, 0x8B, 0x96, 0x4B, 0xDB, 0x2D, 0x80, 0xA4, 0x1D, 0xE3, ++ 0x81, 0xDC, 0x52, 0x99, 0xBA, 0x9B, 0x6A, 0x9D, 0x48, 0x1F, 0x73, 0xF7, ++ 0xAC, 0x09, 0x13, 0xA1, 0x16, 0x2C, 0x60, 0xFB, 0xBC, 0x25, 0xF7, 0x53, ++ 0xD1, 0x04, 0x5A, 0x3F, 0x95, 0x09, 0x5E, 0xE5, 0xA2, 0x7D, 0xFC, 0x2A, ++ 0x51, 0x1D, 0x21, 0xCE, 0x2B, 0x4E, 0x1B, 0xB8, 0xCB, 0xDD, 0x24, 0xEE, ++ 0x99, 0x1D, 0x37, 0xDC, 0xED, 0x5F, 0x2F, 0x48, 0x5E, 0x33, 0x94, 0x06, ++ 0x19, 0xCD, 0x5A, 0x26, 0x85, 0x77, 0x9D, 0xAF, 0x86, 0x97, 0xC9, 0x08, ++ 0xD5, 0x81, 0x0E, 0xB8, 0x9F, 0xB6, 0xAF, 0x20, 0x72, 0xDC, 0x13, 0x4D, ++ 0x7A, 0xE4, 0x5C, 0x81, 0xDE, 0xC0, 0x3D, 0x19, 0x9C, 0x33, 0x11, 0x07, ++ 0xD5, 0xA9, 0x51, 0x67, 0xCD, 0xFD, 0x37, 0x61, 0x14, 0x9F, 0xE7, 0x70, ++ 0x18, 0x32, 0xC3, 0x34, 0x54, 0x0D, 0x4F, 0xB4, 0xAE, 0x9F, 0xEC, 0x64, ++ 0xD8, 0xB2, 0x16, 0xA4, 0xB2, 0x99, 0x92, 0xCB, 0x7F, 0x1F, 0x06, 0x17, ++ 0x5F, 0xA1, 0x07, 0x68, 0xAE, 0xA7, 0x2D, 0x03, 0x91, 0x2A, 0x9D, 0x69, ++ 0xC2, 0x9D, 0x90, 0xF7, 0xF9, 0x66, 0x5D, 0x13, 0xB7, 0x7F, 0xD3, 0x97, ++ 0x45, 0x97, 0x43, 0xD8, 0xCE, 0x3C, 0xF2, 0x98, 0x98, 0xDD, 0xE2, 0x2D, ++ 0xCF, 0xA1, 0xC4, 0x25, 0x46, 0x2E, 0xD2, 0xE5, 0x5F, 0xC6, 0x01, 0xC5, ++ 0x4F, 0x42, 0x2B, 0xDE, 0x0F, 0xEA, 0x4A, 0x4F, 0xC3, 0x5B, 0xDF, 0x9B, ++ 0x5D, 0x30, 0x18, 0x93, 0xD0, 0xDE, 0xC5, 0x09, 0xAA, 0x57, 0x57, 0xBD, ++ 0x2D, 0x84, 0x03, 0xB7 + }; + + static const unsigned char kat_RSA_PSS_SHA256[] = { +- 0x6D, 0x3D, 0xBE, 0x8F, 0x60, 0x6D, 0x25, 0x14, 0xF0, 0x31, 0xE3, 0x89, +- 0x00, 0x97, 0xFA, 0x99, 0x71, 0x28, 0xE5, 0x10, 0x25, 0x9A, 0xF3, 0x8F, +- 0x7B, 0xC5, 0xA8, 0x4A, 0x74, 0x51, 0x36, 0xE2, 0x8D, 0x7D, 0x73, 0x28, +- 0xC1, 0x77, 0xC6, 0x27, 0x97, 0x00, 0x8B, 0x00, 0xA3, 0x96, 0x73, 0x4E, +- 0x7D, 0x2E, 0x2C, 0x34, 0x68, 0x8C, 0x8E, 0xDF, 0x9D, 0x49, 0x47, 0x05, +- 0xAB, 0xF5, 0x01, 0xD6, 0x81, 0x47, 0x70, 0xF5, 0x1D, 0x6D, 0x26, 0xBA, +- 0x2F, 0x7A, 0x54, 0x53, 0x4E, 0xED, 0x71, 0xD9, 0x5A, 0xF3, 0xDA, 0xB6, +- 0x0B, 0x47, 0x34, 0xAF, 0x90, 0xDC, 0xC8, 0xD9, 0x6F, 0x56, 0xCD, 0x9F, +- 0x21, 0xB7, 0x7E, 0xAD, 0x7C, 0x2F, 0x75, 0x50, 0x47, 0x12, 0xE4, 0x6D, +- 0x5F, 0xB7, 0x01, 0xDF, 0xC3, 0x11, 0x6C, 0xA9, 0x9E, 0x49, 0xB9, 0xF6, +- 0x72, 0xF4, 0xF6, 0xEF, 0x88, 0x1E, 0x2D, 0x1C ++ 0x38, 0xDA, 0x99, 0x51, 0x26, 0x38, 0xC6, 0x7F, 0xC4, 0x81, 0x57, 0x19, ++ 0x35, 0xC6, 0xF6, 0x1E, 0x90, 0x47, 0x20, 0x55, 0x47, 0x56, 0x26, 0xE9, ++ 0xF2, 0xA8, 0x39, 0x6C, 0xD5, 0xCD, 0xCB, 0x55, 0xFC, 0x0C, 0xC5, 0xCB, ++ 0xF7, 0x40, 0x17, 0x3B, 0xCF, 0xE4, 0x05, 0x03, 0x3B, 0xA0, 0xB2, 0xC9, ++ 0x0D, 0x5E, 0x48, 0x3A, 0xE9, 0xAD, 0x28, 0x71, 0x7D, 0x8F, 0x89, 0x16, ++ 0x59, 0x93, 0x35, 0xDC, 0x4D, 0x7B, 0xDF, 0x84, 0xE4, 0x68, 0xAA, 0x33, ++ 0xAA, 0xDC, 0x66, 0x50, 0xC8, 0xA9, 0x32, 0x12, 0xDC, 0xC6, 0x90, 0x49, ++ 0x0B, 0x75, 0xFF, 0x9B, 0x95, 0x00, 0x9A, 0x90, 0xE0, 0xD4, 0x0E, 0x67, ++ 0xAB, 0x3C, 0x47, 0x36, 0xC5, 0x2E, 0x1C, 0x46, 0xF0, 0x2D, 0xD3, 0x8B, ++ 0x42, 0x08, 0xDE, 0x0D, 0xB6, 0x2C, 0x86, 0xB0, 0x35, 0x71, 0x18, 0x6B, ++ 0x89, 0x67, 0xC0, 0x05, 0xAD, 0xF4, 0x1D, 0x62, 0x4E, 0x75, 0xEC, 0xD6, ++ 0xC2, 0xDB, 0x07, 0xB0, 0xB6, 0x8D, 0x15, 0xAD, 0xCD, 0xBF, 0xF5, 0x60, ++ 0x76, 0xAE, 0x48, 0xB8, 0x77, 0x7F, 0xC5, 0x01, 0xD9, 0x29, 0xBB, 0xD6, ++ 0x17, 0xA2, 0x20, 0x5A, 0xC0, 0x4A, 0x3B, 0x34, 0xC8, 0xB9, 0x39, 0xCF, ++ 0x06, 0x89, 0x95, 0x6F, 0xC7, 0xCA, 0xC4, 0xE4, 0x43, 0xDF, 0x5A, 0x23, ++ 0xE2, 0x89, 0xA3, 0x38, 0x78, 0x31, 0x38, 0xC6, 0xA4, 0x6F, 0x5F, 0x73, ++ 0x5A, 0xE5, 0x9E, 0x09, 0xE7, 0x6F, 0xD4, 0xF8, 0x3E, 0xB7, 0xB0, 0x56, ++ 0x9A, 0xF3, 0x65, 0xF0, 0xC2, 0xA6, 0x8A, 0x08, 0xBA, 0x44, 0xAC, 0x97, ++ 0xDE, 0xB4, 0x16, 0x83, 0xDF, 0xE3, 0xEE, 0x71, 0xFA, 0xF9, 0x51, 0x50, ++ 0x14, 0xDC, 0xFD, 0x6A, 0x82, 0x20, 0x68, 0x64, 0x7D, 0x4E, 0x82, 0x68, ++ 0xD7, 0x45, 0xFA, 0x6A, 0xE4, 0xE5, 0x29, 0x3A, 0x70, 0xFB, 0xE4, 0x62, ++ 0x2B, 0x31, 0xB9, 0x7D + }; + + static const unsigned char kat_RSA_PSS_SHA384[] = { +- 0x40, 0xFB, 0xA1, 0x21, 0xF4, 0xB2, 0x40, 0x9A, 0xB4, 0x31, 0xA8, 0xF2, +- 0xEC, 0x1C, 0xC4, 0xC8, 0x7C, 0x22, 0x65, 0x9C, 0x57, 0x45, 0xCD, 0x5E, +- 0x86, 0x00, 0xF7, 0x25, 0x78, 0xDE, 0xDC, 0x7A, 0x71, 0x44, 0x9A, 0xCD, +- 0xAA, 0x25, 0xF4, 0xB2, 0xFC, 0xF0, 0x75, 0xD9, 0x2F, 0x78, 0x23, 0x7F, +- 0x6F, 0x02, 0xEF, 0xC1, 0xAF, 0xA6, 0x28, 0x16, 0x31, 0xDC, 0x42, 0x6C, +- 0xB2, 0x44, 0xE5, 0x4D, 0x66, 0xA2, 0xE6, 0x71, 0xF3, 0xAC, 0x4F, 0xFB, +- 0x91, 0xCA, 0xF5, 0x70, 0xEF, 0x6B, 0x9D, 0xA4, 0xEF, 0xD9, 0x3D, 0x2F, +- 0x3A, 0xBE, 0x89, 0x38, 0x59, 0x01, 0xBA, 0xDA, 0x32, 0xAD, 0x42, 0x89, +- 0x98, 0x8B, 0x39, 0x44, 0xF0, 0xFC, 0x38, 0xAC, 0x87, 0x1F, 0xCA, 0x6F, +- 0x48, 0xF6, 0xAE, 0xD7, 0x45, 0xEE, 0xAE, 0x88, 0x0E, 0x60, 0xF4, 0x55, +- 0x48, 0x44, 0xEE, 0x1F, 0x90, 0x18, 0x4B, 0xF1 ++ 0x99, 0x02, 0xC9, 0x1E, 0x31, 0x82, 0xB4, 0xE6, 0x1B, 0x32, 0xCE, 0x5D, ++ 0x41, 0x1D, 0x00, 0x2F, 0x04, 0x8B, 0xBD, 0x37, 0x79, 0xCF, 0x77, 0x03, ++ 0x05, 0x6A, 0x21, 0xC7, 0x8D, 0x24, 0x60, 0x49, 0x39, 0x58, 0xC5, 0x27, ++ 0x8F, 0xC5, 0x97, 0x4A, 0xB2, 0xE1, 0xD4, 0x36, 0x57, 0xBD, 0x43, 0xCC, ++ 0x7B, 0xCE, 0xF2, 0xA5, 0x30, 0xF8, 0x72, 0x14, 0xBB, 0xD0, 0x9F, 0xC1, ++ 0x49, 0xC8, 0x1C, 0xAF, 0xCD, 0x95, 0x78, 0x72, 0x25, 0xF9, 0x45, 0xC6, ++ 0x5B, 0x62, 0x5E, 0x01, 0xD7, 0x40, 0x5E, 0xC8, 0xCA, 0x0A, 0xF3, 0xBA, ++ 0x08, 0x07, 0x88, 0xCA, 0x49, 0x36, 0x84, 0x7D, 0xF6, 0xFC, 0x5A, 0xDB, ++ 0xFC, 0x50, 0xD3, 0xEB, 0x3D, 0x83, 0xB0, 0xF5, 0x94, 0x5E, 0x88, 0xC3, ++ 0x82, 0xCD, 0x53, 0x40, 0x96, 0x18, 0x6B, 0x4A, 0x6C, 0x9C, 0xFE, 0xE5, ++ 0x3B, 0x75, 0xF9, 0xEB, 0xA5, 0x77, 0x11, 0xEF, 0x88, 0x1C, 0x25, 0x70, ++ 0x7D, 0x88, 0x5D, 0xC3, 0xCA, 0xE1, 0x49, 0x14, 0x90, 0xAD, 0xF2, 0x5E, ++ 0x49, 0xD7, 0x99, 0xA5, 0x7B, 0x77, 0x3B, 0x8E, 0xB8, 0xDB, 0xF1, 0x4C, ++ 0xD6, 0x9A, 0xDC, 0xE5, 0x7A, 0x1C, 0xE1, 0xCE, 0x9D, 0xF1, 0xF3, 0xA0, ++ 0x0A, 0x35, 0x52, 0x9D, 0xB9, 0x46, 0x94, 0x82, 0x0F, 0xF7, 0xB2, 0x62, ++ 0x51, 0x70, 0x75, 0xD2, 0x37, 0x96, 0x67, 0x2F, 0xD0, 0x22, 0xD8, 0x07, ++ 0x8D, 0x69, 0x9E, 0x6D, 0x0B, 0x40, 0x4F, 0x70, 0xEC, 0x0B, 0xCA, 0x88, ++ 0x80, 0x8D, 0x9A, 0xF4, 0xF9, 0x18, 0x50, 0x27, 0x08, 0xFA, 0xCC, 0xC7, ++ 0x3F, 0xE4, 0x84, 0x83, 0xA1, 0xB6, 0x1D, 0x23, 0x34, 0xFE, 0x48, 0xE5, ++ 0xE3, 0xAE, 0x4D, 0x98, 0xBC, 0xA6, 0x8A, 0x9F, 0xFD, 0x4D, 0xDB, 0x9D, ++ 0xF7, 0xEB, 0x4E, 0xB6, 0x6F, 0x25, 0xEA, 0x7A, 0xE9, 0x85, 0xB2, 0xEF, ++ 0x90, 0xD2, 0xA6, 0x2B + }; + + static const unsigned char kat_RSA_PSS_SHA512[] = { +- 0x07, 0x1E, 0xD8, 0xD5, 0x05, 0xE8, 0xE6, 0xE6, 0x57, 0xAE, 0x63, 0x8C, +- 0xC6, 0x83, 0xB7, 0xA0, 0x59, 0xBB, 0xF2, 0xC6, 0x8F, 0x12, 0x53, 0x9A, +- 0x9B, 0x54, 0x9E, 0xB3, 0xC1, 0x1D, 0x23, 0x4D, 0x51, 0xED, 0x9E, 0xDD, +- 0x4B, 0xF3, 0x46, 0x9B, 0x6B, 0xF6, 0x7C, 0x24, 0x60, 0x79, 0x23, 0x39, +- 0x01, 0x1C, 0x51, 0xCB, 0xD8, 0xE9, 0x9A, 0x01, 0x67, 0x5F, 0xFE, 0xD7, +- 0x7C, 0xE3, 0x7F, 0xED, 0xDB, 0x87, 0xBB, 0xF0, 0x3D, 0x78, 0x55, 0x61, +- 0x57, 0xE3, 0x0F, 0xE3, 0xD2, 0x9D, 0x0C, 0x2A, 0x20, 0xB0, 0x85, 0x13, +- 0xC5, 0x47, 0x34, 0x0D, 0x32, 0x15, 0xC8, 0xAE, 0x9A, 0x6A, 0x39, 0x63, +- 0x2D, 0x60, 0xF5, 0x4C, 0xDF, 0x8A, 0x48, 0x4B, 0xBF, 0xF4, 0xA8, 0xFE, +- 0x76, 0xF2, 0x32, 0x1B, 0x9C, 0x7C, 0xCA, 0xFE, 0x7F, 0x80, 0xC2, 0x88, +- 0x5C, 0x97, 0x70, 0xB4, 0x26, 0xC9, 0x14, 0x8B ++ 0x3F, 0x83, 0x43, 0x78, 0x25, 0xBE, 0x81, 0xB2, 0x6E, 0x78, 0x11, 0x32, ++ 0xD0, 0x88, 0x05, 0x53, 0x95, 0xED, 0x81, 0x12, 0xCE, 0x50, 0xD9, 0x06, ++ 0x42, 0x89, 0xA0, 0x55, 0x7A, 0x05, 0x13, 0x94, 0x35, 0x9B, 0xCA, 0x5D, ++ 0xCB, 0xB2, 0x32, 0xE1, 0x04, 0x99, 0xEC, 0xE7, 0xA6, 0x69, 0x4D, 0x2B, ++ 0xC1, 0x57, 0x13, 0x48, 0x0D, 0x6B, 0x4D, 0x83, 0x28, 0x06, 0x79, 0x9D, ++ 0xB4, 0x70, 0xCE, 0xC0, 0xFC, 0x3B, 0x69, 0xB3, 0x91, 0x54, 0xA9, 0x44, ++ 0x2E, 0xDA, 0x4A, 0xC5, 0xC2, 0x99, 0xF0, 0xDE, 0xCA, 0x77, 0x99, 0x6B, ++ 0x0C, 0x79, 0xE5, 0x29, 0x74, 0x83, 0x69, 0xEA, 0xB8, 0x72, 0x30, 0x3D, ++ 0x7A, 0x30, 0xE1, 0x03, 0x7B, 0x09, 0xE6, 0x11, 0xC0, 0xDC, 0xFF, 0xFD, ++ 0xBD, 0xEC, 0x9C, 0xCC, 0x46, 0x7B, 0x4C, 0x4C, 0x59, 0xBE, 0x82, 0x7C, ++ 0xF5, 0x60, 0x5A, 0xC3, 0xE8, 0xA8, 0x8A, 0x38, 0x9E, 0x01, 0x57, 0xF1, ++ 0x79, 0x3A, 0x7C, 0xA3, 0x9F, 0x12, 0x1A, 0x4F, 0x2E, 0xA2, 0xE5, 0x0A, ++ 0xAB, 0xC0, 0xF4, 0xA5, 0xE3, 0x5F, 0x89, 0x1C, 0x8F, 0xA4, 0x5E, 0xCE, ++ 0x0D, 0x91, 0x05, 0x1B, 0x17, 0x62, 0x48, 0xFE, 0xA5, 0x4C, 0xEF, 0x2D, ++ 0x28, 0xF1, 0x5E, 0xE6, 0xD1, 0x30, 0x89, 0x0A, 0xAD, 0x18, 0xAF, 0x6F, ++ 0x04, 0x09, 0x36, 0x9A, 0xFF, 0xCA, 0xA1, 0xA7, 0x05, 0x7F, 0xD4, 0xBF, ++ 0x3A, 0xB5, 0x42, 0x6D, 0xE9, 0x07, 0x29, 0x65, 0x8B, 0xAD, 0x4D, 0x0F, ++ 0x22, 0xE1, 0x59, 0x43, 0x68, 0x87, 0xA8, 0x8B, 0xBC, 0x69, 0xA1, 0x94, ++ 0x22, 0x3E, 0x8A, 0x49, 0xE8, 0xA3, 0x6F, 0xC2, 0x93, 0x58, 0xE7, 0xAE, ++ 0xC9, 0x1F, 0xCF, 0x61, 0x93, 0xFC, 0xC1, 0xF6, 0xF3, 0x27, 0x7F, 0x0A, ++ 0x90, 0xE0, 0x65, 0x32, 0x57, 0x47, 0xE2, 0xED, 0x08, 0x59, 0xA6, 0xF0, ++ 0x17, 0x2C, 0x13, 0xE0 + }; + + static const unsigned char kat_RSA_SHA1[] = { +- 0x71, 0xEE, 0x1A, 0xC0, 0xFE, 0x01, 0x93, 0x54, 0x79, 0x5C, 0xF2, 0x4C, +- 0x4A, 0xFD, 0x1A, 0x05, 0x8F, 0x64, 0xB1, 0x6D, 0x61, 0x33, 0x8D, 0x9B, +- 0xE7, 0xFD, 0x60, 0xA3, 0x83, 0xB5, 0xA3, 0x51, 0x55, 0x77, 0x90, 0xCF, +- 0xDC, 0x22, 0x37, 0x8E, 0xD0, 0xE1, 0xAE, 0x09, 0xE3, 0x3D, 0x1E, 0xF8, +- 0x80, 0xD1, 0x8B, 0xC2, 0xEC, 0x0A, 0xD7, 0x6B, 0x88, 0x8B, 0x8B, 0xA1, +- 0x20, 0x22, 0xBE, 0x59, 0x5B, 0xE0, 0x23, 0x24, 0xA1, 0x49, 0x30, 0xBA, +- 0xA9, 0x9E, 0xE8, 0xB1, 0x8A, 0x62, 0x16, 0xBF, 0x4E, 0xCA, 0x2E, 0x4E, +- 0xBC, 0x29, 0xA8, 0x67, 0x13, 0xB7, 0x9F, 0x1D, 0x04, 0x44, 0xE5, 0x5F, +- 0x35, 0x07, 0x11, 0xBC, 0xED, 0x19, 0x37, 0x21, 0xCF, 0x23, 0x48, 0x1F, +- 0x72, 0x05, 0xDE, 0xE6, 0xE8, 0x7F, 0x33, 0x8A, 0x76, 0x4B, 0x2F, 0x95, +- 0xDF, 0xF1, 0x5F, 0x84, 0x80, 0xD9, 0x46, 0xB4 ++ 0x3B, 0x60, 0x4B, 0xFC, 0x54, 0x28, 0x23, 0xE6, 0x2F, 0x05, 0x04, 0xBA, ++ 0x9D, 0xE4, 0x3C, 0xB8, 0x5B, 0x60, 0x5C, 0xCD, 0x9D, 0xEA, 0xC3, 0x4C, ++ 0xC2, 0x33, 0xE6, 0xC6, 0x21, 0x48, 0x76, 0xEC, 0xB2, 0xF5, 0x11, 0xDE, ++ 0x44, 0xB4, 0xAF, 0x16, 0x11, 0xC3, 0x18, 0x16, 0xB3, 0x69, 0xBB, 0x94, ++ 0xED, 0xE8, 0xB3, 0x9E, 0xB1, 0x43, 0x8E, 0xCE, 0xB4, 0x34, 0x9B, 0x08, ++ 0x22, 0xAF, 0x31, 0x73, 0xB5, 0xFA, 0x11, 0x7E, 0x8F, 0x13, 0x52, 0xEC, ++ 0xC9, 0x03, 0xEE, 0x0D, 0x2B, 0x91, 0x32, 0xF2, 0x8E, 0xDF, 0x02, 0xE0, ++ 0x0A, 0x47, 0xD2, 0x0A, 0x51, 0x00, 0x1A, 0x30, 0x6F, 0x0C, 0xB3, 0x54, ++ 0x64, 0x20, 0x90, 0x0C, 0x01, 0xBE, 0xC0, 0x42, 0x8C, 0x5D, 0x18, 0x6F, ++ 0x32, 0x75, 0x45, 0x7B, 0x1C, 0x04, 0xA2, 0x9F, 0x84, 0xD7, 0xF5, 0x3A, ++ 0x95, 0xD4, 0xE8, 0x8D, 0xEC, 0x99, 0xEF, 0x18, 0x5E, 0x64, 0xD3, 0xAF, ++ 0xF8, 0xD4, 0xFF, 0x3C, 0x87, 0xA0, 0x3F, 0xC7, 0x22, 0x05, 0xFD, 0xFD, ++ 0x29, 0x8A, 0x28, 0xDA, 0xA9, 0x8A, 0x8B, 0x23, 0x62, 0x9D, 0x42, 0xB8, ++ 0x4A, 0x76, 0x0D, 0x9F, 0x9A, 0xE0, 0xE6, 0xDD, 0xAD, 0x5E, 0x5F, 0xD5, ++ 0x32, 0xE9, 0x4B, 0x97, 0x7D, 0x62, 0x0A, 0xB3, 0xBE, 0xF2, 0x8C, 0x1F, ++ 0x2B, 0x22, 0x06, 0x15, 0x33, 0x71, 0xED, 0x9B, 0xA0, 0x82, 0xCE, 0xBF, ++ 0x3B, 0x08, 0x5F, 0xA7, 0x20, 0x94, 0x09, 0xEB, 0x82, 0xA5, 0x41, 0x60, ++ 0xF1, 0x08, 0xEB, 0x8D, 0xCC, 0x8D, 0xC9, 0x52, 0x0A, 0xAF, 0xF4, 0xF9, ++ 0x9F, 0x82, 0xD8, 0x0B, 0x75, 0x5E, 0xE4, 0xAF, 0x65, 0x96, 0xAF, 0xFC, ++ 0x33, 0xBF, 0x9F, 0x3E, 0xA4, 0x7B, 0x86, 0xC7, 0xF7, 0x47, 0xAB, 0x37, ++ 0x05, 0xD6, 0x0D, 0x31, 0x72, 0x8C, 0x80, 0x1E, 0xA9, 0x54, 0xFC, 0xDF, ++ 0x27, 0x90, 0xE2, 0x01 + }; + + static const unsigned char kat_RSA_SHA224[] = { +- 0x62, 0xAA, 0x79, 0xA9, 0x18, 0x0E, 0x5F, 0x8C, 0xBB, 0xB7, 0x15, 0xF9, +- 0x25, 0xBB, 0xFA, 0xD4, 0x3A, 0x34, 0xED, 0x9E, 0xA0, 0xA9, 0x18, 0x8D, +- 0x5B, 0x55, 0x9A, 0x7E, 0x1E, 0x08, 0x08, 0x60, 0xC5, 0x1A, 0xC5, 0x89, +- 0x08, 0xE2, 0x1B, 0xBD, 0x62, 0x50, 0x17, 0x76, 0x30, 0x2C, 0x9E, 0xCD, +- 0xA4, 0x02, 0xAD, 0xB1, 0x6D, 0x44, 0x6D, 0xD5, 0xC6, 0x45, 0x41, 0xE5, +- 0xEE, 0x1F, 0x8D, 0x7E, 0x08, 0x16, 0xA6, 0xE1, 0x5E, 0x0B, 0xA9, 0xCC, +- 0xDB, 0x59, 0x55, 0x87, 0x09, 0x25, 0x70, 0x86, 0x84, 0x02, 0xC6, 0x3B, +- 0x0B, 0x44, 0x4C, 0x46, 0x95, 0xF4, 0xF8, 0x5A, 0x91, 0x28, 0x3E, 0xB2, +- 0x58, 0x2E, 0x06, 0x45, 0x49, 0xE0, 0x92, 0xE2, 0xC0, 0x66, 0xE6, 0x35, +- 0xD9, 0x79, 0x7F, 0x17, 0x5E, 0x02, 0x73, 0x04, 0x77, 0x82, 0xE6, 0xDC, +- 0x40, 0x21, 0x89, 0x8B, 0x37, 0x3E, 0x1E, 0x8D ++ 0xA2, 0xD8, 0x42, 0x53, 0xDD, 0xBF, 0x1F, 0x6B, 0x07, 0xE0, 0x60, 0x86, ++ 0x5A, 0x60, 0x06, 0x8F, 0x44, 0xD9, 0xB0, 0x4A, 0xAA, 0x90, 0x71, 0xB8, ++ 0xB2, 0xBC, 0x30, 0x41, 0x50, 0xBB, 0xFD, 0x46, 0x98, 0x4D, 0xC0, 0x89, ++ 0x57, 0x85, 0x8A, 0x97, 0x49, 0x25, 0xA8, 0x0C, 0x69, 0x70, 0x19, 0x39, ++ 0x66, 0x24, 0xB4, 0x69, 0x47, 0xD2, 0x7C, 0xDE, 0x2D, 0x37, 0x59, 0xB3, ++ 0xE3, 0xC7, 0x6B, 0xDD, 0xBE, 0xE1, 0xE6, 0x28, 0x9A, 0x8D, 0x42, 0x3E, ++ 0x28, 0x01, 0xD7, 0x03, 0xC9, 0x73, 0xC3, 0x6B, 0x03, 0xEC, 0x1E, 0xF8, ++ 0x53, 0x8B, 0x52, 0x42, 0x89, 0x55, 0xB7, 0x87, 0xA9, 0x94, 0xC2, 0xB4, ++ 0x4B, 0x76, 0xF5, 0x61, 0x47, 0xE1, 0x44, 0x7B, 0xEC, 0xB4, 0x25, 0x66, ++ 0xC0, 0xFF, 0xEB, 0x86, 0x24, 0xAA, 0xA8, 0x72, 0xC7, 0xFB, 0xFB, 0xF6, ++ 0x84, 0xA7, 0x5B, 0xD4, 0x87, 0xE5, 0x84, 0x56, 0x1E, 0x4C, 0xE5, 0xBC, ++ 0x87, 0x94, 0xAC, 0x9C, 0x1B, 0x3D, 0xF7, 0xD4, 0x36, 0x85, 0x9F, 0xC9, ++ 0xF6, 0x43, 0x3F, 0xB6, 0x25, 0x33, 0x48, 0x0F, 0xE5, 0x7C, 0xCD, 0x53, ++ 0x48, 0xEB, 0x02, 0x11, 0xB9, 0x9E, 0xC3, 0xB4, 0xE1, 0x54, 0xD6, 0xAA, ++ 0x1A, 0x9E, 0x10, 0xE1, 0x27, 0x25, 0xF2, 0xE1, 0xAB, 0xAB, 0x6C, 0x45, ++ 0x61, 0xD5, 0xA3, 0x6C, 0xB6, 0x33, 0x52, 0xAE, 0x3D, 0xFD, 0x22, 0xFC, ++ 0x3A, 0xAB, 0x63, 0x94, 0xB5, 0x3A, 0x69, 0x11, 0xAC, 0x99, 0x4F, 0x33, ++ 0x67, 0x0A, 0x1A, 0x70, 0x1E, 0xB9, 0xE2, 0x26, 0x27, 0x68, 0xEA, 0xF5, ++ 0x97, 0x55, 0xAC, 0x83, 0x6A, 0x40, 0x3B, 0x56, 0xAE, 0x13, 0x88, 0xE8, ++ 0x98, 0x72, 0x52, 0x91, 0x7F, 0x78, 0x0A, 0x18, 0xD4, 0x44, 0x78, 0x83, ++ 0x0D, 0x44, 0x77, 0xA6, 0xF3, 0x04, 0xF1, 0x8C, 0xBC, 0x2F, 0xF9, 0x5B, ++ 0xDB, 0x70, 0x00, 0xF6 + }; + + static const unsigned char kat_RSA_SHA256[] = { +- 0x0D, 0x55, 0xE2, 0xAA, 0x81, 0xDB, 0x8E, 0x82, 0x05, 0x17, 0xA5, 0x23, +- 0xE7, 0x3B, 0x1D, 0xAF, 0xFB, 0x8C, 0xD0, 0x81, 0x20, 0x7B, 0xAA, 0x23, +- 0x92, 0x87, 0x8C, 0xD1, 0x53, 0x85, 0x16, 0xDC, 0xBE, 0xAD, 0x6F, 0x35, +- 0x98, 0x2D, 0x69, 0x84, 0xBF, 0xD9, 0x8A, 0x01, 0x17, 0x58, 0xB2, 0x6E, +- 0x2C, 0x44, 0x9B, 0x90, 0xF1, 0xFB, 0x51, 0xE8, 0x6A, 0x90, 0x2D, 0x18, +- 0x0E, 0xC0, 0x90, 0x10, 0x24, 0xA9, 0x1D, 0xB3, 0x58, 0x7A, 0x91, 0x30, +- 0xBE, 0x22, 0xC7, 0xD3, 0xEC, 0xC3, 0x09, 0x5D, 0xBF, 0xE2, 0x80, 0x3A, +- 0x7C, 0x85, 0xB4, 0xBC, 0xD1, 0xE9, 0xF0, 0x5C, 0xDE, 0x81, 0xA6, 0x38, +- 0xB8, 0x42, 0xBB, 0x86, 0xC5, 0x9D, 0xCE, 0x7C, 0x2C, 0xEE, 0xD1, 0xDA, +- 0x27, 0x48, 0x2B, 0xF5, 0xAB, 0xB9, 0xF7, 0x80, 0xD1, 0x90, 0x27, 0x90, +- 0xBD, 0x44, 0x97, 0x60, 0xCD, 0x57, 0xC0, 0x7A ++ 0xC2, 0xB1, 0x97, 0x00, 0x9A, 0xE5, 0x80, 0x6A, 0xE2, 0x51, 0x68, 0xB9, ++ 0x7A, 0x0C, 0xF2, 0xB4, 0x77, 0xED, 0x15, 0x0C, 0x4E, 0xE1, 0xDC, 0xFF, ++ 0x8E, 0xBC, 0xDE, 0xC7, 0x9A, 0x96, 0xF1, 0x47, 0x45, 0x24, 0x9D, 0x6F, ++ 0xA6, 0xF3, 0x1D, 0x0D, 0x35, 0x4C, 0x1A, 0xF3, 0x58, 0x2C, 0x6C, 0x06, ++ 0xD6, 0x22, 0x37, 0x77, 0x8C, 0x33, 0xE5, 0x07, 0x53, 0x93, 0x28, 0xCF, ++ 0x67, 0xFA, 0xC4, 0x1F, 0x1B, 0x24, 0xDB, 0x4C, 0xC5, 0x2A, 0x51, 0xA2, ++ 0x60, 0x15, 0x8C, 0x54, 0xB4, 0x30, 0xE2, 0x24, 0x47, 0x86, 0xF2, 0xF8, ++ 0x6C, 0xD6, 0x12, 0x59, 0x2C, 0x74, 0x9A, 0x37, 0xF3, 0xC4, 0xA2, 0xD5, ++ 0x4E, 0x1F, 0x77, 0xF0, 0x27, 0xCE, 0x77, 0xF8, 0x4A, 0x79, 0x03, 0xBE, ++ 0xC8, 0x06, 0x2D, 0xA7, 0xA6, 0x46, 0xF5, 0x55, 0x79, 0xD7, 0x5C, 0xC6, ++ 0x5B, 0xB1, 0x00, 0x4E, 0x7C, 0xD9, 0x11, 0x85, 0xE0, 0xB1, 0x4D, 0x2D, ++ 0x13, 0xD7, 0xAC, 0xEA, 0x64, 0xD1, 0xAC, 0x8F, 0x8D, 0x8F, 0xEA, 0x42, ++ 0x7F, 0xF9, 0xB7, 0x7D, 0x2C, 0x68, 0x49, 0x07, 0x7A, 0x74, 0xEF, 0xB4, ++ 0xC9, 0x97, 0x16, 0x5C, 0x6C, 0x6E, 0x5C, 0x09, 0x2E, 0x8E, 0x13, 0x2E, ++ 0x1A, 0x8D, 0xA6, 0x0C, 0x6E, 0x0C, 0x1C, 0x0F, 0xCC, 0xB2, 0x78, 0x8A, ++ 0x07, 0xFC, 0x5C, 0xC2, 0xF5, 0x65, 0xEC, 0xAB, 0x8B, 0x3C, 0xCA, 0x91, ++ 0x6F, 0x84, 0x7C, 0x21, 0x0E, 0xB8, 0xDA, 0x7B, 0x6C, 0xF7, 0xDF, 0xAB, ++ 0x7E, 0x15, 0xFD, 0x85, 0x0B, 0x33, 0x9B, 0x6A, 0x3A, 0xC3, 0xEF, 0x65, ++ 0x04, 0x6E, 0xB2, 0xAC, 0x98, 0xFD, 0xEB, 0x02, 0xF5, 0xC0, 0x0B, 0x5E, ++ 0xCB, 0xD4, 0x83, 0x82, 0x18, 0x1B, 0xDA, 0xB4, 0xCD, 0xE8, 0x71, 0x6B, ++ 0x1D, 0xB5, 0x4F, 0xE9, 0xD6, 0x43, 0xA0, 0x0A, 0x14, 0xA0, 0xE7, 0x5D, ++ 0x47, 0x9D, 0x18, 0xD7 + }; + + static const unsigned char kat_RSA_SHA384[] = { +- 0x1D, 0xE3, 0x6A, 0xDD, 0x27, 0x4C, 0xC0, 0xA5, 0x27, 0xEF, 0xE6, 0x1F, +- 0xD2, 0x91, 0x68, 0x59, 0x04, 0xAE, 0xBD, 0x99, 0x63, 0x56, 0x47, 0xC7, +- 0x6F, 0x22, 0x16, 0x48, 0xD0, 0xF9, 0x18, 0xA9, 0xCA, 0xFA, 0x5D, 0x5C, +- 0xA7, 0x65, 0x52, 0x8A, 0xC8, 0x44, 0x7E, 0x86, 0x5D, 0xA9, 0xA6, 0x55, +- 0x65, 0x3E, 0xD9, 0x2D, 0x02, 0x38, 0xA8, 0x79, 0x28, 0x7F, 0xB6, 0xCF, +- 0x82, 0xDD, 0x7E, 0x55, 0xE1, 0xB1, 0xBC, 0xE2, 0x19, 0x2B, 0x30, 0xC2, +- 0x1B, 0x2B, 0xB0, 0x82, 0x46, 0xAC, 0x4B, 0xD1, 0xE2, 0x7D, 0xEB, 0x8C, +- 0xFF, 0x95, 0xE9, 0x6A, 0x1C, 0x3D, 0x4D, 0xBF, 0x8F, 0x8B, 0x9C, 0xCD, +- 0xEA, 0x85, 0xEE, 0x00, 0xDC, 0x1C, 0xA7, 0xEB, 0xD0, 0x8F, 0x99, 0xF1, +- 0x16, 0x28, 0x24, 0x64, 0x04, 0x39, 0x2D, 0x58, 0x1E, 0x37, 0xDC, 0x04, +- 0xBD, 0x31, 0xA2, 0x2F, 0xB3, 0x35, 0x56, 0xBF ++ 0x11, 0x5E, 0x63, 0xFE, 0x47, 0xAA, 0x6A, 0x84, 0xEB, 0x44, 0x9A, 0x00, ++ 0x96, 0x4A, 0xED, 0xD2, 0xA7, 0x67, 0x3A, 0x64, 0x82, 0x30, 0x61, 0x2D, ++ 0xE3, 0xF5, 0x49, 0x68, 0x5E, 0x60, 0xD2, 0x4D, 0xEF, 0xF2, 0xA4, 0xB2, ++ 0x9A, 0x81, 0x1D, 0x41, 0xA5, 0x73, 0x59, 0xEB, 0xBB, 0xC4, 0x9E, 0x2B, ++ 0xEB, 0xC3, 0xDE, 0x3A, 0xEA, 0xF5, 0xAD, 0xDA, 0x87, 0x08, 0x68, 0xCF, ++ 0x12, 0x9B, 0xC1, 0xE4, 0xA7, 0x71, 0xF8, 0xBD, 0x6B, 0x6F, 0x50, 0xF1, ++ 0xD1, 0xFF, 0xCE, 0x6C, 0xD9, 0xBE, 0xDA, 0x76, 0xF3, 0xEB, 0xAB, 0x9C, ++ 0x41, 0x6E, 0x4F, 0x35, 0x7A, 0x61, 0x27, 0xBC, 0x03, 0x3E, 0xAE, 0x3E, ++ 0x1B, 0xDD, 0xAC, 0xD9, 0x1A, 0xFF, 0xD3, 0xF5, 0x66, 0x43, 0x07, 0x76, ++ 0x8A, 0x69, 0x2D, 0x14, 0xB1, 0xBE, 0x55, 0x49, 0x90, 0x89, 0x4B, 0xC4, ++ 0x11, 0x67, 0xD5, 0x9D, 0xB0, 0xB2, 0xEE, 0x8D, 0x0A, 0x47, 0x4A, 0xD9, ++ 0x0E, 0xD1, 0x24, 0xF0, 0x30, 0x2B, 0xF2, 0x79, 0x47, 0xDB, 0x70, 0xB4, ++ 0x46, 0xF2, 0xF8, 0xB7, 0xB4, 0xF6, 0x34, 0x79, 0xA8, 0x2D, 0x3D, 0x56, ++ 0xD5, 0x9A, 0x60, 0x7A, 0x04, 0xC7, 0x66, 0x1D, 0xCD, 0x3C, 0xD5, 0x39, ++ 0x37, 0x12, 0x51, 0x5E, 0x9F, 0xF8, 0x1A, 0xAF, 0x13, 0xC1, 0x13, 0x00, ++ 0x35, 0xD5, 0x8D, 0x17, 0xE3, 0x02, 0x28, 0xD9, 0xEC, 0xDE, 0xD1, 0x2F, ++ 0x93, 0x49, 0x03, 0x11, 0x3E, 0x56, 0x9D, 0xC2, 0x31, 0xF8, 0xAF, 0x2D, ++ 0xD9, 0x99, 0xB7, 0x8A, 0xAC, 0x5A, 0x86, 0x20, 0x3A, 0x83, 0x29, 0x26, ++ 0x9D, 0x03, 0x52, 0x2B, 0x34, 0x56, 0x40, 0x16, 0x53, 0x50, 0x82, 0xC9, ++ 0xC7, 0xD5, 0x51, 0x4C, 0xED, 0xB3, 0xE2, 0xE1, 0xCF, 0xA8, 0xCE, 0xBD, ++ 0xB1, 0x48, 0xA6, 0x8A, 0x79, 0x17, 0x55, 0x11, 0xEF, 0xE8, 0x14, 0xF4, ++ 0x7E, 0x37, 0x1D, 0x96 + }; + + static const unsigned char kat_RSA_SHA512[] = { +- 0x69, 0x52, 0x1B, 0x51, 0x5E, 0x06, 0xCA, 0x9B, 0x16, 0x51, 0x5D, 0xCF, +- 0x49, 0x25, 0x4A, 0xA1, 0x6A, 0x77, 0x4C, 0x36, 0x40, 0xF8, 0xB2, 0x9A, +- 0x15, 0xEA, 0x5C, 0xE5, 0xE6, 0x82, 0xE0, 0x86, 0x82, 0x6B, 0x32, 0xF1, +- 0x04, 0xC1, 0x5A, 0x1A, 0xED, 0x1E, 0x9A, 0xB6, 0x4C, 0x54, 0x9F, 0xD8, +- 0x8D, 0xCC, 0xAC, 0x8A, 0xBB, 0x9C, 0x82, 0x3F, 0xA6, 0x53, 0x62, 0xB5, +- 0x80, 0xE2, 0xBC, 0xDD, 0x67, 0x2B, 0xD9, 0x3F, 0xE4, 0x75, 0x92, 0x6B, +- 0xAF, 0x62, 0x7C, 0x52, 0xF0, 0xEE, 0x33, 0xDF, 0x1B, 0x1D, 0x47, 0xE6, +- 0x59, 0x56, 0xA5, 0xB9, 0x5C, 0xE6, 0x77, 0x78, 0x16, 0x63, 0x84, 0x05, +- 0x6F, 0x0E, 0x2B, 0x31, 0x9D, 0xF7, 0x7F, 0xB2, 0x64, 0x71, 0xE0, 0x2D, +- 0x3E, 0x62, 0xCE, 0xB5, 0x3F, 0x88, 0xDF, 0x2D, 0xAB, 0x98, 0x65, 0x91, +- 0xDF, 0x70, 0x14, 0xA5, 0x3F, 0x36, 0xAB, 0x84 ++ 0x35, 0x6D, 0xF1, 0x9E, 0xCF, 0xB1, 0xF6, 0x0C, 0x04, 0x21, 0x17, 0xB3, ++ 0xC4, 0x9D, 0xFE, 0x62, 0x1C, 0x1A, 0x45, 0x00, 0x2E, 0x6B, 0xB6, 0x9F, ++ 0x5C, 0xB1, 0xCB, 0xCF, 0xF9, 0x67, 0xEA, 0x62, 0x8A, 0xEB, 0x77, 0x02, ++ 0x42, 0x30, 0x88, 0xB1, 0x48, 0xDF, 0x12, 0x60, 0x6E, 0x92, 0xBB, 0x4B, ++ 0x09, 0x68, 0xD1, 0x70, 0x2B, 0x59, 0xEE, 0x57, 0x96, 0xF9, 0xEA, 0xA3, ++ 0x4C, 0xE9, 0xC9, 0xBD, 0x25, 0x34, 0x66, 0x15, 0x6C, 0xC9, 0x81, 0xD1, ++ 0x48, 0x0F, 0x33, 0x5F, 0x05, 0x4F, 0xC2, 0xC4, 0xDD, 0x09, 0x54, 0x79, ++ 0xA1, 0x57, 0x07, 0x70, 0xA0, 0x33, 0x02, 0x4D, 0x5D, 0xE9, 0x24, 0xD1, ++ 0xEF, 0xF0, 0x61, 0xD0, 0x1D, 0x41, 0xE2, 0x9B, 0x2B, 0x7C, 0xD0, 0x4E, ++ 0x55, 0xD9, 0x6D, 0xA1, 0x16, 0x9F, 0xDA, 0xC3, 0x3B, 0xF1, 0x74, 0xD1, ++ 0x99, 0xF1, 0x63, 0x57, 0xAD, 0xC7, 0x55, 0xF4, 0x97, 0x43, 0x1C, 0xED, ++ 0x1B, 0x7A, 0x32, 0xCB, 0x24, 0xA6, 0x3D, 0x93, 0x37, 0x90, 0x74, 0xEE, ++ 0xD2, 0x8D, 0x4B, 0xBC, 0x72, 0xDA, 0x25, 0x2B, 0x64, 0xE9, 0xCA, 0x69, ++ 0x36, 0xB6, 0xEC, 0x6E, 0x8F, 0x33, 0x0E, 0x74, 0x40, 0x48, 0x51, 0xE2, ++ 0x54, 0x6F, 0xAF, 0x6E, 0x36, 0x54, 0x3A, 0xEC, 0x78, 0x37, 0xE6, 0x1F, ++ 0x76, 0xA5, 0x4D, 0xA6, 0xD9, 0xB3, 0x6B, 0x17, 0x6D, 0x61, 0xFC, 0xA3, ++ 0x85, 0x4A, 0xCC, 0xDA, 0x52, 0xAC, 0x5B, 0xDA, 0x51, 0xE5, 0x7F, 0x5B, ++ 0x52, 0x8B, 0x74, 0x75, 0x99, 0x5C, 0x01, 0xFD, 0x25, 0x3E, 0xCD, 0x86, ++ 0x6F, 0x7A, 0xC0, 0xD8, 0x17, 0x6F, 0xD1, 0xD2, 0x6B, 0xAB, 0x14, 0x1F, ++ 0x3B, 0xB8, 0x15, 0x05, 0x86, 0x40, 0x36, 0xCF, 0xDA, 0x59, 0x2B, 0x9A, ++ 0xE9, 0x1E, 0x6E, 0xD3, 0x6B, 0xA1, 0x19, 0xC5, 0xE6, 0x3F, 0xE9, 0x2E, ++ 0x43, 0xA8, 0x34, 0x0A }; +-static const unsigned char kat_RSA_X931_SHA1[] = { +- 0x86, 0xB4, 0x18, 0xBA, 0xD1, 0x80, 0xB6, 0x7C, 0x42, 0x45, 0x4D, 0xDF, +- 0xE9, 0x2D, 0xE1, 0x83, 0x5F, 0xB5, 0x2F, 0xC9, 0xCD, 0xC4, 0xB2, 0x75, +- 0x80, 0xA4, 0xF1, 0x4A, 0xE7, 0x83, 0x12, 0x1E, 0x1E, 0x14, 0xB8, 0xAC, +- 0x35, 0xE2, 0xAA, 0x0B, 0x5C, 0xF8, 0x38, 0x4D, 0x04, 0xEE, 0xA9, 0x97, +- 0x70, 0xFB, 0x5E, 0xE7, 0xB7, 0xE3, 0x62, 0x23, 0x4B, 0x38, 0xBE, 0xD6, +- 0x53, 0x15, 0xF7, 0xDF, 0x87, 0xB4, 0x0E, 0xCC, 0xB1, 0x1A, 0x11, 0x19, +- 0xEE, 0x51, 0xCC, 0x92, 0xDD, 0xBC, 0x63, 0x29, 0x63, 0x0C, 0x59, 0xD7, +- 0x6F, 0x4C, 0x3C, 0x37, 0x5B, 0x37, 0x03, 0x61, 0x7D, 0x24, 0x1C, 0x99, +- 0x48, 0xAF, 0x82, 0xFE, 0x32, 0x41, 0x9B, 0xB2, 0xDB, 0xEA, 0xED, 0x76, +- 0x8E, 0x6E, 0xCA, 0x7E, 0x4E, 0x14, 0xBA, 0x30, 0x84, 0x1C, 0xB3, 0x67, +- 0xA3, 0x29, 0x80, 0x70, 0x54, 0x68, 0x7D, 0x49 +-}; +static int fips_rsa_encrypt_test(RSA *rsa, const unsigned char *plaintext, int ptlen) + { + unsigned char *ctbuf = NULL, *ptbuf = NULL; + int ret = 0; + int len; -+ + +-static const unsigned char kat_RSA_X931_SHA256[] = { +- 0x7E, 0xA2, 0x77, 0xFE, 0xB8, 0x54, 0x8A, 0xC7, 0x7F, 0x64, 0x54, 0x89, +- 0xE5, 0x52, 0x15, 0x8E, 0x52, 0x96, 0x4E, 0xA6, 0x58, 0x92, 0x1C, 0xDD, +- 0xEA, 0xA2, 0x2D, 0x5C, 0xD1, 0x62, 0x00, 0x49, 0x05, 0x95, 0x73, 0xCF, +- 0x16, 0x76, 0x68, 0xF6, 0xC6, 0x5E, 0x80, 0xB8, 0xB8, 0x7B, 0xC8, 0x9B, +- 0xC6, 0x53, 0x88, 0x26, 0x20, 0x88, 0x73, 0xB6, 0x13, 0xB8, 0xF0, 0x4B, +- 0x00, 0x85, 0xF3, 0xDD, 0x07, 0x50, 0xEB, 0x20, 0xC4, 0x38, 0x0E, 0x98, +- 0xAD, 0x4E, 0x49, 0x2C, 0xD7, 0x65, 0xA5, 0x19, 0x0E, 0x59, 0x01, 0xEC, +- 0x7E, 0x75, 0x89, 0x69, 0x2E, 0x63, 0x76, 0x85, 0x46, 0x8D, 0xA0, 0x8C, +- 0x33, 0x1D, 0x82, 0x8C, 0x03, 0xEA, 0x69, 0x88, 0x35, 0xA1, 0x42, 0xBD, +- 0x21, 0xED, 0x8D, 0xBC, 0xBC, 0xDB, 0x30, 0xFF, 0x86, 0xF0, 0x5B, 0xDC, +- 0xE3, 0xE2, 0xE8, 0x0A, 0x0A, 0x29, 0x94, 0x80 +-}; + ctbuf = OPENSSL_malloc(RSA_size(rsa)); + if (!ctbuf) + goto err; -+ + +-static const unsigned char kat_RSA_X931_SHA384[] = { +- 0x5C, 0x7D, 0x96, 0x35, 0xEC, 0x7E, 0x11, 0x38, 0xBB, 0x7B, 0xEC, 0x7B, +- 0xF2, 0x82, 0x8E, 0x99, 0xBD, 0xEF, 0xD8, 0xAE, 0xD7, 0x39, 0x37, 0xCB, +- 0xE6, 0x4F, 0x5E, 0x0A, 0x13, 0xE4, 0x2E, 0x40, 0xB9, 0xBE, 0x2E, 0xE3, +- 0xEF, 0x78, 0x83, 0x18, 0x44, 0x35, 0x9C, 0x8E, 0xD7, 0x4A, 0x63, 0xF6, +- 0x57, 0xC2, 0xB0, 0x08, 0x51, 0x73, 0xCF, 0xCA, 0x99, 0x66, 0xEE, 0x31, +- 0xD8, 0x69, 0xE9, 0xAB, 0x13, 0x27, 0x7B, 0x41, 0x1E, 0x6D, 0x8D, 0xF1, +- 0x3E, 0x9C, 0x35, 0x95, 0x58, 0xDD, 0x2B, 0xD5, 0xA0, 0x60, 0x41, 0x79, +- 0x24, 0x22, 0xE4, 0xB7, 0xBF, 0x47, 0x53, 0xF6, 0x34, 0xD5, 0x7C, 0xFF, +- 0x0E, 0x09, 0xEE, 0x2E, 0xE2, 0x37, 0xB9, 0xDE, 0xC5, 0x12, 0x44, 0x35, +- 0xEF, 0x01, 0xE6, 0x5E, 0x39, 0x31, 0x2D, 0x71, 0xA5, 0xDC, 0xC6, 0x6D, +- 0xE2, 0xCD, 0x85, 0xDB, 0x73, 0x82, 0x65, 0x28 +-}; + len = RSA_public_encrypt(ptlen, plaintext, ctbuf, rsa, RSA_PKCS1_PADDING); + if (len <= 0) + goto err; + /* Check ciphertext doesn't match plaintext */ + if (len >= ptlen && !memcmp(plaintext, ctbuf, ptlen)) + goto err; -+ + +-static const unsigned char kat_RSA_X931_SHA512[] = { +- 0xA6, 0x65, 0xA2, 0x77, 0x4F, 0xB3, 0x86, 0xCB, 0x64, 0x3A, 0xC1, 0x63, +- 0xFC, 0xA1, 0xAA, 0xCB, 0x9B, 0x79, 0xDD, 0x4B, 0xE1, 0xD9, 0xDA, 0xAC, +- 0xE7, 0x47, 0x09, 0xB2, 0x11, 0x4B, 0x8A, 0xAA, 0x05, 0x9E, 0x77, 0xD7, +- 0x3A, 0xBD, 0x5E, 0x53, 0x09, 0x4A, 0xE6, 0x0F, 0x5E, 0xF9, 0x14, 0x28, +- 0xA0, 0x99, 0x74, 0x64, 0x70, 0x4E, 0xF2, 0xE3, 0xFA, 0xC7, 0xF8, 0xC5, +- 0x6E, 0x2B, 0x79, 0x96, 0x0D, 0x0C, 0xC8, 0x10, 0x34, 0x53, 0xD2, 0xAF, +- 0x17, 0x0E, 0xE0, 0xBF, 0x79, 0xF6, 0x04, 0x72, 0x10, 0xE0, 0xF6, 0xD0, +- 0xCE, 0x8A, 0x6F, 0xA1, 0x95, 0x89, 0xBF, 0x58, 0x8F, 0x46, 0x5F, 0x09, +- 0x9F, 0x09, 0xCA, 0x84, 0x15, 0x85, 0xE0, 0xED, 0x04, 0x2D, 0xFB, 0x7C, +- 0x36, 0x35, 0x21, 0x31, 0xC3, 0xFD, 0x92, 0x42, 0x11, 0x30, 0x71, 0x1B, +- 0x60, 0x83, 0x18, 0x88, 0xA3, 0xF5, 0x59, 0xC3 +-}; + ptbuf = OPENSSL_malloc(RSA_size(rsa)); + if (!ptbuf) + goto err; @@ -431,7 +1037,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e + goto err; + + ret = 1; -+ + + err: + if (ctbuf) + OPENSSL_free(ctbuf); @@ -442,7 +1048,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e int FIPS_selftest_rsa() { -@@ -353,7 +389,7 @@ int FIPS_selftest_rsa() +@@ -353,7 +487,7 @@ int FIPS_selftest_rsa() if ((pk=EVP_PKEY_new()) == NULL) goto err; @@ -451,13 +1057,35 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, kat_RSA_SHA1, sizeof(kat_RSA_SHA1), -@@ -430,13 +466,15 @@ int FIPS_selftest_rsa() - "RSA SHA512 X931")) +@@ -407,36 +541,15 @@ int FIPS_selftest_rsa() + "RSA SHA512 PSS")) goto err; +- +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA1, sizeof(kat_RSA_X931_SHA1), +- EVP_sha1(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA1 X931")) +- goto err; +- /* NB: SHA224 not supported in X9.31 */ +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA256, sizeof(kat_RSA_X931_SHA256), +- EVP_sha256(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA256 X931")) +- goto err; +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA384, sizeof(kat_RSA_X931_SHA384), +- EVP_sha384(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA384 X931")) +- goto err; +- if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1, +- kat_RSA_X931_SHA512, sizeof(kat_RSA_X931_SHA512), +- EVP_sha512(), EVP_MD_CTX_FLAG_PAD_X931, +- "RSA SHA512 X931")) + if (!fips_rsa_encrypt_test(key, kat_tbs, sizeof(kat_tbs) - 1)) -+ goto err; + goto err; +- ret = 1; err: @@ -796,7 +1424,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa * All rights reserved. * * This package is an SSL implementation written -@@ -165,6 +166,222 @@ int RSA_generate_key_ex(RSA *rsa, int bi +@@ -165,6 +166,236 @@ int RSA_generate_key_ex(RSA *rsa, int bi return rsa_builtin_keygen(rsa, bits, e_value, cb); } @@ -819,7 +1447,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa + return 0; + } + -+ if (bits != 2048 && bits != 3072) ++ if ((pbits & 0xFF) || (getenv("OPENSSL_ENFORCE_MODULUS_BITS") && bits != 2048 && bits != 3072)) + { + FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN, FIPS_R_INVALID_KEY_LENGTH); + return 0; @@ -866,6 +1494,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa + if (!BN_is_zero(rsa->p) && !BN_is_zero(rsa->q)) + test = 1; + ++retry: + /* generate p and q */ + for (i = 0; i < 5 * pbits; i++) + { @@ -958,7 +1587,18 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa + /* calculate d */ + if (!BN_sub(r1,rsa->p,BN_value_one())) goto err; /* p-1 */ + if (!BN_sub(r2,rsa->q,BN_value_one())) goto err; /* q-1 */ -+ if (!BN_mul(r0,r1,r2,ctx)) goto err; /* (p-1)(q-1) */ ++ ++ if (!BN_gcd(r0, r1, r2, ctx)) goto err; ++ if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) ++ { ++ pr0 = &local_r0; ++ BN_with_flags(pr0, r0, BN_FLG_CONSTTIME); ++ } ++ else ++ pr0 = r0; ++ if (!BN_div(r0, NULL, r1, pr0, ctx)) goto err; ++ if (!BN_mul(r0,r0,r2,ctx)) goto err; /* lcm(p-1, q-1) */ ++ + if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) + { + pr0 = &local_r0; @@ -968,6 +1608,8 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa + pr0 = r0; + if (!BN_mod_inverse(rsa->d,rsa->e,pr0,ctx)) goto err; /* d */ + ++ if (BN_num_bits(rsa->d) < pbits) goto retry; /* d is too small */ ++ + /* set up d for correct BN_FLG_CONSTTIME flag */ + if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME)) + { @@ -1019,7 +1661,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb) { BIGNUM *r0=NULL,*r1=NULL,*r2=NULL,*r3=NULL,*tmp; -@@ -176,17 +393,7 @@ static int rsa_builtin_keygen(RSA *rsa, +@@ -176,17 +407,12 @@ static int rsa_builtin_keygen(RSA *rsa, #ifdef OPENSSL_FIPS if (FIPS_module_mode()) { @@ -1029,16 +1671,16 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa - return 0; - } - -- if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS) -- { -- FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT); -- return 0; -- } + if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS) + { + FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT); + return 0; + } + return FIPS_rsa_builtin_keygen(rsa, bits, e_value, cb); } #endif -@@ -301,17 +508,6 @@ static int rsa_builtin_keygen(RSA *rsa, +@@ -301,17 +527,6 @@ static int rsa_builtin_keygen(RSA *rsa, p = rsa->p; if (!BN_mod_inverse(rsa->iqmp,rsa->q,p,ctx)) goto err; diff --git a/SOURCES/openssl-1.0.1e-ppc-asm-update.patch b/SOURCES/openssl-1.0.1e-ppc-asm-update.patch new file mode 100644 index 0000000..91efede --- /dev/null +++ b/SOURCES/openssl-1.0.1e-ppc-asm-update.patch @@ -0,0 +1,6677 @@ +diff --git a/Configure b/Configure +index 9c803dc..5a5c2d8 100755 +--- a/Configure ++++ b/Configure +@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes + my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; + my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; + my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; +-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; +-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::"; ++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:"; ++my $ppc32_asm=$ppc64_asm; + my $no_asm=":::::::::::::::void"; + + # As for $BSDthreads. Idea is to maintain "collective" set of flags, +@@ -357,6 +357,7 @@ my %table=( + #### + "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", ++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", + "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +@@ -462,8 +463,8 @@ my %table=( + + #### IBM's AIX. + "aix3-cc", "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::", +-"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32", +-"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64", ++"aix-gcc", "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:$ppc32_asm:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32", ++"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:$ppc64_asm:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64", + # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE + # at build time. $OBJECT_MODE is respected at ./config stage! + "aix-cc", "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32", +@@ -1525,7 +1526,7 @@ else { + $wp_obj="wp_block.o"; + } + $cmll_obj=$cmll_enc unless ($cmll_obj =~ /.o$/); +-if ($modes_obj =~ /ghash/) ++if ($modes_obj =~ /ghash\-/) + { + $cflags.=" -DGHASH_ASM"; + } +diff --git a/config b/config +index 88b9bc6..8b80802 100755 +--- a/config ++++ b/config +@@ -587,13 +587,20 @@ case "$GUESSOS" in + fi + ;; + ppc64-*-linux2) +- echo "WARNING! If you wish to build 64-bit library, then you have to" +- echo " invoke './Configure linux-ppc64' *manually*." +- if [ "$TEST" = "false" -a -t 1 ]; then +- echo " You have about 5 seconds to press Ctrl-C to abort." +- (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 ++ if [ -z "$KERNEL_BITS" ]; then ++ echo "WARNING! If you wish to build 64-bit library, then you have to" ++ echo " invoke './Configure linux-ppc64' *manually*." ++ if [ "$TEST" = "false" -a -t 1 ]; then ++ echo " You have about 5 seconds to press Ctrl-C to abort." ++ (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1 ++ fi ++ fi ++ if [ "$KERNEL_BITS" = "64" ]; then ++ OUT="linux-ppc64" ++ else ++ OUT="linux-ppc" ++ (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32" + fi +- OUT="linux-ppc" + ;; + ppc-*-linux2) OUT="linux-ppc" ;; + ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;; +diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile +index 45ede0a..847f4ee 100644 +--- a/crypto/aes/Makefile ++++ b/crypto/aes/Makefile +@@ -71,6 +71,10 @@ aes-sparcv9.s: asm/aes-sparcv9.pl + + aes-ppc.s: asm/aes-ppc.pl + $(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@ ++vpaes-ppc.s: asm/vpaes-ppc.pl ++ $(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@ ++aesp8-ppc.s: asm/aesp8-ppc.pl ++ $(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@ + + aes-parisc.s: asm/aes-parisc.pl + $(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@ +diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl +index 7c52cbe..7a99fc3 100644 +--- a/crypto/aes/asm/aes-ppc.pl ++++ b/crypto/aes/asm/aes-ppc.pl +@@ -45,6 +45,8 @@ if ($flavour =~ /64/) { + $PUSH ="stw"; + } else { die "nonsense $flavour"; } + ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -68,7 +70,7 @@ $key="r5"; + $Tbl0="r3"; + $Tbl1="r6"; + $Tbl2="r7"; +-$Tbl3="r2"; ++$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack + + $s0="r8"; + $s1="r9"; +@@ -76,7 +78,7 @@ $s2="r10"; + $s3="r11"; + + $t0="r12"; +-$t1="r13"; ++$t1="r0"; # stay away from "r13"; + $t2="r14"; + $t3="r15"; + +@@ -100,9 +102,6 @@ $acc13="r29"; + $acc14="r30"; + $acc15="r31"; + +-# stay away from TLS pointer +-if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; } +-else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; } + $mask80=$Tbl2; + $mask1b=$Tbl3; + +@@ -337,8 +336,7 @@ $code.=<<___; + $STU $sp,-$FRAME($sp) + mflr r0 + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) ++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -365,16 +363,61 @@ $code.=<<___; + bne Lenc_unaligned + + Lenc_unaligned_ok: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $s0,0($inp) + lwz $s1,4($inp) + lwz $s2,8($inp) + lwz $s3,12($inp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $t0,0($inp) ++ lwz $t1,4($inp) ++ lwz $t2,8($inp) ++ lwz $t3,12($inp) ++ rotlwi $s0,$t0,8 ++ rotlwi $s1,$t1,8 ++ rotlwi $s2,$t2,8 ++ rotlwi $s3,$t3,8 ++ rlwimi $s0,$t0,24,0,7 ++ rlwimi $s1,$t1,24,0,7 ++ rlwimi $s2,$t2,24,0,7 ++ rlwimi $s3,$t3,24,0,7 ++ rlwimi $s0,$t0,24,16,23 ++ rlwimi $s1,$t1,24,16,23 ++ rlwimi $s2,$t2,24,16,23 ++ rlwimi $s3,$t3,24,16,23 ++___ ++$code.=<<___; + bl LAES_Te + bl Lppc_AES_encrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ rotlwi $t0,$s0,8 ++ rotlwi $t1,$s1,8 ++ rotlwi $t2,$s2,8 ++ rotlwi $t3,$s3,8 ++ rlwimi $t0,$s0,24,0,7 ++ rlwimi $t1,$s1,24,0,7 ++ rlwimi $t2,$s2,24,0,7 ++ rlwimi $t3,$s3,24,0,7 ++ rlwimi $t0,$s0,24,16,23 ++ rlwimi $t1,$s1,24,16,23 ++ rlwimi $t2,$s2,24,16,23 ++ rlwimi $t3,$s3,24,16,23 ++ stw $t0,0($out) ++ stw $t1,4($out) ++ stw $t2,8($out) ++ stw $t3,12($out) ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) ++___ ++$code.=<<___; + b Lenc_done + + Lenc_unaligned: +@@ -417,6 +460,7 @@ Lenc_xpage: + + bl LAES_Te + bl Lppc_AES_encrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 +@@ -449,8 +493,6 @@ Lenc_xpage: + + Lenc_done: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -764,6 +806,7 @@ Lenc_compact_done: + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .AES_encrypt,.-.AES_encrypt + + .globl .AES_decrypt + .align 7 +@@ -771,8 +814,7 @@ Lenc_compact_done: + $STU $sp,-$FRAME($sp) + mflr r0 + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) ++ $PUSH $out,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -799,16 +841,61 @@ Lenc_compact_done: + bne Ldec_unaligned + + Ldec_unaligned_ok: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + lwz $s0,0($inp) + lwz $s1,4($inp) + lwz $s2,8($inp) + lwz $s3,12($inp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $t0,0($inp) ++ lwz $t1,4($inp) ++ lwz $t2,8($inp) ++ lwz $t3,12($inp) ++ rotlwi $s0,$t0,8 ++ rotlwi $s1,$t1,8 ++ rotlwi $s2,$t2,8 ++ rotlwi $s3,$t3,8 ++ rlwimi $s0,$t0,24,0,7 ++ rlwimi $s1,$t1,24,0,7 ++ rlwimi $s2,$t2,24,0,7 ++ rlwimi $s3,$t3,24,0,7 ++ rlwimi $s0,$t0,24,16,23 ++ rlwimi $s1,$t1,24,16,23 ++ rlwimi $s2,$t2,24,16,23 ++ rlwimi $s3,$t3,24,16,23 ++___ ++$code.=<<___; + bl LAES_Td + bl Lppc_AES_decrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ rotlwi $t0,$s0,8 ++ rotlwi $t1,$s1,8 ++ rotlwi $t2,$s2,8 ++ rotlwi $t3,$s3,8 ++ rlwimi $t0,$s0,24,0,7 ++ rlwimi $t1,$s1,24,0,7 ++ rlwimi $t2,$s2,24,0,7 ++ rlwimi $t3,$s3,24,0,7 ++ rlwimi $t0,$s0,24,16,23 ++ rlwimi $t1,$s1,24,16,23 ++ rlwimi $t2,$s2,24,16,23 ++ rlwimi $t3,$s3,24,16,23 ++ stw $t0,0($out) ++ stw $t1,4($out) ++ stw $t2,8($out) ++ stw $t3,12($out) ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); + stw $s0,0($out) + stw $s1,4($out) + stw $s2,8($out) + stw $s3,12($out) ++___ ++$code.=<<___; + b Ldec_done + + Ldec_unaligned: +@@ -851,6 +938,7 @@ Ldec_xpage: + + bl LAES_Td + bl Lppc_AES_decrypt_compact ++ $POP $out,`$FRAME-$SIZE_T*19`($sp) + + extrwi $acc00,$s0,8,0 + extrwi $acc01,$s0,8,8 +@@ -883,8 +971,6 @@ Ldec_xpage: + + Ldec_done: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -1355,6 +1441,7 @@ Ldec_compact_done: + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .AES_decrypt,.-.AES_decrypt + + .asciz "AES for PPC, CRYPTOGAMS by " + .align 7 +diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl +new file mode 100755 +index 0000000..3ee8979 +--- /dev/null ++++ b/crypto/aes/asm/aesp8-ppc.pl +@@ -0,0 +1,1940 @@ ++#!/usr/bin/env perl ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# This module implements support for AES instructions as per PowerISA ++# specification version 2.07, first implemented by POWER8 processor. ++# The module is endian-agnostic in sense that it supports both big- ++# and little-endian cases. Data alignment in parallelizable modes is ++# handled with VSX loads and stores, which implies MSR.VSX flag being ++# set. It should also be noted that ISA specification doesn't prohibit ++# alignment exceptions for these instructions on page boundaries. ++# Initially alignment was handled in pure AltiVec/VMX way [when data ++# is aligned programmatically, which in turn guarantees exception- ++# free execution], but it turned to hamper performance when vcipher ++# instructions are interleaved. It's reckoned that eventual ++# misalignment penalties at page boundaries are in average lower ++# than additional overhead in pure AltiVec approach. ++ ++$flavour = shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T =8; ++ $LRSAVE =2*$SIZE_T; ++ $STU ="stdu"; ++ $POP ="ld"; ++ $PUSH ="std"; ++ $UCMP ="cmpld"; ++ $SHL ="sldi"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T =4; ++ $LRSAVE =$SIZE_T; ++ $STU ="stwu"; ++ $POP ="lwz"; ++ $PUSH ="stw"; ++ $UCMP ="cmplw"; ++ $SHL ="slwi"; ++} else { die "nonsense $flavour"; } ++ ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; ++ ++$FRAME=8*$SIZE_T; ++$prefix="aes_p8"; ++ ++$sp="r1"; ++$vrsave="r12"; ++ ++######################################################################### ++{{{ # Key setup procedures # ++my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); ++my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); ++my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); ++ ++$code.=<<___; ++.machine "any" ++ ++.text ++ ++.align 7 ++rcon: ++.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev ++.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev ++.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev ++.long 0,0,0,0 ?asis ++Lconsts: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr $ptr #vvvvv "distance between . and rcon ++ addi $ptr,$ptr,-0x48 ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.asciz "AES for PowerISA 2.07, CRYPTOGAMS by " ++ ++.globl .${prefix}_set_encrypt_key ++.align 5 ++.${prefix}_set_encrypt_key: ++Lset_encrypt_key: ++ mflr r11 ++ $PUSH r11,$LRSAVE($sp) ++ ++ li $ptr,-1 ++ ${UCMP}i $inp,0 ++ beq- Lenc_key_abort # if ($inp==0) return -1; ++ ${UCMP}i $out,0 ++ beq- Lenc_key_abort # if ($out==0) return -1; ++ li $ptr,-2 ++ cmpwi $bits,128 ++ blt- Lenc_key_abort ++ cmpwi $bits,256 ++ bgt- Lenc_key_abort ++ andi. r0,$bits,0x3f ++ bne- Lenc_key_abort ++ ++ lis r0,0xfff0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ bl Lconsts ++ mtlr r11 ++ ++ neg r9,$inp ++ lvx $in0,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ lvsr $key,0,r9 # borrow $key ++ li r8,0x20 ++ cmpwi $bits,192 ++ lvx $in1,0,$inp ++ le?vspltisb $mask,0x0f # borrow $mask ++ lvx $rcon,0,$ptr ++ le?vxor $key,$key,$mask # adjust for byte swap ++ lvx $mask,r8,$ptr ++ addi $ptr,$ptr,0x10 ++ vperm $in0,$in0,$in1,$key # align [and byte swap in LE] ++ li $cnt,8 ++ vxor $zero,$zero,$zero ++ mtctr $cnt ++ ++ ?lvsr $outperm,0,$out ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$zero,$outmask,$outperm ++ ++ blt Loop128 ++ addi $inp,$inp,8 ++ beq L192 ++ addi $inp,$inp,8 ++ b L256 ++ ++.align 4 ++Loop128: ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ bdnz Loop128 ++ ++ lvx $rcon,0,$ptr # last two round keys ++ ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ ++ vperm $key,$in0,$in0,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vxor $in0,$in0,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,0x50 ++ ++ li $rounds,10 ++ b Ldone ++ ++.align 4 ++L192: ++ lvx $tmp,0,$inp ++ li $cnt,4 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] ++ vspltisb $key,8 # borrow $key ++ mtctr $cnt ++ vsububm $mask,$mask,$key # adjust the mask ++ ++Loop192: ++ vperm $key,$in1,$in1,$mask # roate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vcipherlast $key,$key,$rcon ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ ++ vsldoi $stage,$zero,$in1,8 ++ vspltw $tmp,$in0,3 ++ vxor $tmp,$tmp,$in1 ++ vsldoi $in1,$zero,$in1,12 # >>32 ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in1,$in1,$tmp ++ vxor $in0,$in0,$key ++ vxor $in1,$in1,$key ++ vsldoi $stage,$stage,$in0,8 ++ ++ vperm $key,$in1,$in1,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$stage,$stage,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vsldoi $stage,$in0,$in1,8 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vperm $outtail,$stage,$stage,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vspltw $tmp,$in0,3 ++ vxor $tmp,$tmp,$in1 ++ vsldoi $in1,$zero,$in1,12 # >>32 ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in1,$in1,$tmp ++ vxor $in0,$in0,$key ++ vxor $in1,$in1,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,16 ++ bdnz Loop192 ++ ++ li $rounds,12 ++ addi $out,$out,0x20 ++ b Ldone ++ ++.align 4 ++L256: ++ lvx $tmp,0,$inp ++ li $cnt,7 ++ li $rounds,14 ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] ++ mtctr $cnt ++ ++Loop256: ++ vperm $key,$in1,$in1,$mask # rotate-n-splat ++ vsldoi $tmp,$zero,$in0,12 # >>32 ++ vperm $outtail,$in1,$in1,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ vcipherlast $key,$key,$rcon ++ stvx $stage,0,$out ++ addi $out,$out,16 ++ ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in0,$in0,$tmp ++ vadduwm $rcon,$rcon,$rcon ++ vxor $in0,$in0,$key ++ vperm $outtail,$in0,$in0,$outperm # rotate ++ vsel $stage,$outhead,$outtail,$outmask ++ vmr $outhead,$outtail ++ stvx $stage,0,$out ++ addi $inp,$out,15 # 15 is not typo ++ addi $out,$out,16 ++ bdz Ldone ++ ++ vspltw $key,$in0,3 # just splat ++ vsldoi $tmp,$zero,$in1,12 # >>32 ++ vsbox $key,$key ++ ++ vxor $in1,$in1,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in1,$in1,$tmp ++ vsldoi $tmp,$zero,$tmp,12 # >>32 ++ vxor $in1,$in1,$tmp ++ ++ vxor $in1,$in1,$key ++ b Loop256 ++ ++.align 4 ++Ldone: ++ lvx $in1,0,$inp # redundant in aligned case ++ vsel $in1,$outhead,$in1,$outmask ++ stvx $in1,0,$inp ++ li $ptr,0 ++ mtspr 256,$vrsave ++ stw $rounds,0($out) ++ ++Lenc_key_abort: ++ mr r3,$ptr ++ blr ++ .long 0 ++ .byte 0,12,0x14,1,0,0,3,0 ++ .long 0 ++.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key ++ ++.globl .${prefix}_set_decrypt_key ++.align 5 ++.${prefix}_set_decrypt_key: ++ $STU $sp,-$FRAME($sp) ++ mflr r10 ++ $PUSH r10,$FRAME+$LRSAVE($sp) ++ bl Lset_encrypt_key ++ mtlr r10 ++ ++ cmpwi r3,0 ++ bne- Ldec_key_abort ++ ++ slwi $cnt,$rounds,4 ++ subi $inp,$out,240 # first round key ++ srwi $rounds,$rounds,1 ++ add $out,$inp,$cnt # last round key ++ mtctr $rounds ++ ++Ldeckey: ++ lwz r0, 0($inp) ++ lwz r6, 4($inp) ++ lwz r7, 8($inp) ++ lwz r8, 12($inp) ++ addi $inp,$inp,16 ++ lwz r9, 0($out) ++ lwz r10,4($out) ++ lwz r11,8($out) ++ lwz r12,12($out) ++ stw r0, 0($out) ++ stw r6, 4($out) ++ stw r7, 8($out) ++ stw r8, 12($out) ++ subi $out,$out,16 ++ stw r9, -16($inp) ++ stw r10,-12($inp) ++ stw r11,-8($inp) ++ stw r12,-4($inp) ++ bdnz Ldeckey ++ ++ xor r3,r3,r3 # return value ++Ldec_key_abort: ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,4,1,0x80,0,3,0 ++ .long 0 ++.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key ++___ ++}}} ++######################################################################### ++{{{ # Single block en- and decrypt procedures # ++sub gen_block () { ++my $dir = shift; ++my $n = $dir eq "de" ? "n" : ""; ++my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); ++ ++$code.=<<___; ++.globl .${prefix}_${dir}crypt ++.align 5 ++.${prefix}_${dir}crypt: ++ lwz $rounds,240($key) ++ lis r0,0xfc00 ++ mfspr $vrsave,256 ++ li $idx,15 # 15 is not typo ++ mtspr 256,r0 ++ ++ lvx v0,0,$inp ++ neg r11,$out ++ lvx v1,$idx,$inp ++ lvsl v2,0,$inp # inpperm ++ le?vspltisb v4,0x0f ++ ?lvsl v3,0,r11 # outperm ++ le?vxor v2,v2,v4 ++ li $idx,16 ++ vperm v0,v0,v1,v2 # align [and byte swap in LE] ++ lvx v1,0,$key ++ ?lvsl v5,0,$key # keyperm ++ srwi $rounds,$rounds,1 ++ lvx v2,$idx,$key ++ addi $idx,$idx,16 ++ subi $rounds,$rounds,1 ++ ?vperm v1,v1,v2,v5 # align round key ++ ++ vxor v0,v0,v1 ++ lvx v1,$idx,$key ++ addi $idx,$idx,16 ++ mtctr $rounds ++ ++Loop_${dir}c: ++ ?vperm v2,v2,v1,v5 ++ v${n}cipher v0,v0,v2 ++ lvx v2,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm v1,v1,v2,v5 ++ v${n}cipher v0,v0,v1 ++ lvx v1,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_${dir}c ++ ++ ?vperm v2,v2,v1,v5 ++ v${n}cipher v0,v0,v2 ++ lvx v2,$idx,$key ++ ?vperm v1,v1,v2,v5 ++ v${n}cipherlast v0,v0,v1 ++ ++ vspltisb v2,-1 ++ vxor v1,v1,v1 ++ li $idx,15 # 15 is not typo ++ ?vperm v2,v1,v2,v3 # outmask ++ le?vxor v3,v3,v4 ++ lvx v1,0,$out # outhead ++ vperm v0,v0,v0,v3 # rotate [and byte swap in LE] ++ vsel v1,v1,v0,v2 ++ lvx v4,$idx,$out ++ stvx v1,0,$out ++ vsel v0,v0,v4,v2 ++ stvx v0,$idx,$out ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,3,0 ++ .long 0 ++.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt ++___ ++} ++&gen_block("en"); ++&gen_block("de"); ++}}} ++######################################################################### ++{{{ # CBC en- and decrypt procedures # ++my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); ++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); ++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= ++ map("v$_",(4..10)); ++$code.=<<___; ++.globl .${prefix}_cbc_encrypt ++.align 5 ++.${prefix}_cbc_encrypt: ++ ${UCMP}i $len,16 ++ bltlr- ++ ++ cmpwi $enc,0 # test direction ++ lis r0,0xffe0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ li $idx,15 ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ ++ lvx $ivec,0,$ivp # load [unaligned] iv ++ lvsl $inpperm,0,$ivp ++ lvx $inptail,$idx,$ivp ++ le?vxor $inpperm,$inpperm,$tmp ++ vperm $ivec,$ivec,$inptail,$inpperm ++ ++ neg r11,$inp ++ ?lvsl $keyperm,0,$key # prepare for unaligned key ++ lwz $rounds,240($key) ++ ++ lvsr $inpperm,0,r11 # prepare for unaligned load ++ lvx $inptail,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ le?vxor $inpperm,$inpperm,$tmp ++ ++ ?lvsr $outperm,0,$out # prepare for unaligned store ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ ++ srwi $rounds,$rounds,1 ++ li $idx,16 ++ subi $rounds,$rounds,1 ++ beq Lcbc_dec ++ ++Lcbc_enc: ++ vmr $inout,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ mtctr $rounds ++ subi $len,$len,16 # len-=16 ++ ++ lvx $rndkey0,0,$key ++ vperm $inout,$inout,$inptail,$inpperm ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ vxor $inout,$inout,$ivec ++ ++Loop_cbc_enc: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_cbc_enc ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ li $idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipherlast $ivec,$inout,$rndkey0 ++ ${UCMP}i $len,16 ++ ++ vperm $tmp,$ivec,$ivec,$outperm ++ vsel $inout,$outhead,$tmp,$outmask ++ vmr $outhead,$tmp ++ stvx $inout,0,$out ++ addi $out,$out,16 ++ bge Lcbc_enc ++ ++ b Lcbc_done ++ ++.align 4 ++Lcbc_dec: ++ ${UCMP}i $len,128 ++ bge _aesp8_cbc_decrypt8x ++ vmr $tmp,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ mtctr $rounds ++ subi $len,$len,16 # len-=16 ++ ++ lvx $rndkey0,0,$key ++ vperm $tmp,$tmp,$inptail,$inpperm ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$tmp,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ ++Loop_cbc_dec: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vncipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vncipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_cbc_dec ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vncipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ li $idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vncipherlast $inout,$inout,$rndkey0 ++ ${UCMP}i $len,16 ++ ++ vxor $inout,$inout,$ivec ++ vmr $ivec,$tmp ++ vperm $tmp,$inout,$inout,$outperm ++ vsel $inout,$outhead,$tmp,$outmask ++ vmr $outhead,$tmp ++ stvx $inout,0,$out ++ addi $out,$out,16 ++ bge Lcbc_dec ++ ++Lcbc_done: ++ addi $out,$out,-1 ++ lvx $inout,0,$out # redundant in aligned case ++ vsel $inout,$outhead,$inout,$outmask ++ stvx $inout,0,$out ++ ++ neg $enc,$ivp # write [unaligned] iv ++ li $idx,15 # 15 is not typo ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ vspltisb $outmask,-1 ++ le?vspltisb $tmp,0x0f ++ ?lvsl $outperm,0,$enc ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ lvx $outhead,0,$ivp ++ vperm $ivec,$ivec,$ivec,$outperm ++ vsel $inout,$outhead,$ivec,$outmask ++ lvx $inptail,$idx,$ivp ++ stvx $inout,0,$ivp ++ vsel $inout,$ivec,$inptail,$outmask ++ stvx $inout,$idx,$ivp ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,6,0 ++ .long 0 ++___ ++######################################################################### ++{{ # Optimized CBC decrypt procedure # ++my $key_="r11"; ++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); ++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); ++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); ++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys ++ # v26-v31 last 6 round keys ++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment ++ ++$code.=<<___; ++.align 5 ++_aesp8_cbc_decrypt8x: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r0,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ mtspr 256,r0 ++ ++ subi $rounds,$rounds,3 # -4 in total ++ subi $len,$len,128 # bias ++ ++ lvx $rndkey0,$x00,$key # load key schedule ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ lvx v31,$x00,$key ++ ?vperm $rndkey0,$rndkey0,v30,$keyperm ++ addi $key_,$sp,$FRAME+15 ++ mtctr $rounds ++ ++Load_cbc_dec_key: ++ ?vperm v24,v30,v31,$keyperm ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ stvx v24,$x00,$key_ # off-load round[1] ++ ?vperm v25,v31,v30,$keyperm ++ lvx v31,$x00,$key ++ stvx v25,$x10,$key_ # off-load round[2] ++ addi $key_,$key_,0x20 ++ bdnz Load_cbc_dec_key ++ ++ lvx v26,$x10,$key ++ ?vperm v24,v30,v31,$keyperm ++ lvx v27,$x20,$key ++ stvx v24,$x00,$key_ # off-load round[3] ++ ?vperm v25,v31,v26,$keyperm ++ lvx v28,$x30,$key ++ stvx v25,$x10,$key_ # off-load round[4] ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ ?vperm v26,v26,v27,$keyperm ++ lvx v29,$x40,$key ++ ?vperm v27,v27,v28,$keyperm ++ lvx v30,$x50,$key ++ ?vperm v28,v28,v29,$keyperm ++ lvx v31,$x60,$key ++ ?vperm v29,v29,v30,$keyperm ++ lvx $out0,$x70,$key # borrow $out0 ++ ?vperm v30,v30,v31,$keyperm ++ lvx v24,$x00,$key_ # pre-load round[1] ++ ?vperm v31,v31,$out0,$keyperm ++ lvx v25,$x10,$key_ # pre-load round[2] ++ ++ #lvx $inptail,0,$inp # "caller" already did this ++ #addi $inp,$inp,15 # 15 is not typo ++ subi $inp,$inp,15 # undo "caller" ++ ++ le?li $idx,8 ++ lvx_u $in0,$x00,$inp # load first 8 "words" ++ le?lvsl $inpperm,0,$idx ++ le?vspltisb $tmp,0x0f ++ lvx_u $in1,$x10,$inp ++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u ++ lvx_u $in2,$x20,$inp ++ le?vperm $in0,$in0,$in0,$inpperm ++ lvx_u $in3,$x30,$inp ++ le?vperm $in1,$in1,$in1,$inpperm ++ lvx_u $in4,$x40,$inp ++ le?vperm $in2,$in2,$in2,$inpperm ++ vxor $out0,$in0,$rndkey0 ++ lvx_u $in5,$x50,$inp ++ le?vperm $in3,$in3,$in3,$inpperm ++ vxor $out1,$in1,$rndkey0 ++ lvx_u $in6,$x60,$inp ++ le?vperm $in4,$in4,$in4,$inpperm ++ vxor $out2,$in2,$rndkey0 ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ le?vperm $in5,$in5,$in5,$inpperm ++ vxor $out3,$in3,$rndkey0 ++ le?vperm $in6,$in6,$in6,$inpperm ++ vxor $out4,$in4,$rndkey0 ++ le?vperm $in7,$in7,$in7,$inpperm ++ vxor $out5,$in5,$rndkey0 ++ vxor $out6,$in6,$rndkey0 ++ vxor $out7,$in7,$rndkey0 ++ ++ mtctr $rounds ++ b Loop_cbc_dec8x ++.align 5 ++Loop_cbc_dec8x: ++ vncipher $out0,$out0,v24 ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vncipher $out0,$out0,v25 ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_cbc_dec8x ++ ++ subic $len,$len,128 # $len-=128 ++ vncipher $out0,$out0,v24 ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vncipher $out0,$out0,v25 ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ ++ and r0,r0,$len ++ vncipher $out0,$out0,v26 ++ vncipher $out1,$out1,v26 ++ vncipher $out2,$out2,v26 ++ vncipher $out3,$out3,v26 ++ vncipher $out4,$out4,v26 ++ vncipher $out5,$out5,v26 ++ vncipher $out6,$out6,v26 ++ vncipher $out7,$out7,v26 ++ ++ add $inp,$inp,r0 # $inp is adjusted in such ++ # way that at exit from the ++ # loop inX-in7 are loaded ++ # with last "words" ++ vncipher $out0,$out0,v27 ++ vncipher $out1,$out1,v27 ++ vncipher $out2,$out2,v27 ++ vncipher $out3,$out3,v27 ++ vncipher $out4,$out4,v27 ++ vncipher $out5,$out5,v27 ++ vncipher $out6,$out6,v27 ++ vncipher $out7,$out7,v27 ++ ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ vncipher $out0,$out0,v28 ++ vncipher $out1,$out1,v28 ++ vncipher $out2,$out2,v28 ++ vncipher $out3,$out3,v28 ++ vncipher $out4,$out4,v28 ++ vncipher $out5,$out5,v28 ++ vncipher $out6,$out6,v28 ++ vncipher $out7,$out7,v28 ++ lvx v24,$x00,$key_ # re-pre-load round[1] ++ ++ vncipher $out0,$out0,v29 ++ vncipher $out1,$out1,v29 ++ vncipher $out2,$out2,v29 ++ vncipher $out3,$out3,v29 ++ vncipher $out4,$out4,v29 ++ vncipher $out5,$out5,v29 ++ vncipher $out6,$out6,v29 ++ vncipher $out7,$out7,v29 ++ lvx v25,$x10,$key_ # re-pre-load round[2] ++ ++ vncipher $out0,$out0,v30 ++ vxor $ivec,$ivec,v31 # xor with last round key ++ vncipher $out1,$out1,v30 ++ vxor $in0,$in0,v31 ++ vncipher $out2,$out2,v30 ++ vxor $in1,$in1,v31 ++ vncipher $out3,$out3,v30 ++ vxor $in2,$in2,v31 ++ vncipher $out4,$out4,v30 ++ vxor $in3,$in3,v31 ++ vncipher $out5,$out5,v30 ++ vxor $in4,$in4,v31 ++ vncipher $out6,$out6,v30 ++ vxor $in5,$in5,v31 ++ vncipher $out7,$out7,v30 ++ vxor $in6,$in6,v31 ++ ++ vncipherlast $out0,$out0,$ivec ++ vncipherlast $out1,$out1,$in0 ++ lvx_u $in0,$x00,$inp # load next input block ++ vncipherlast $out2,$out2,$in1 ++ lvx_u $in1,$x10,$inp ++ vncipherlast $out3,$out3,$in2 ++ le?vperm $in0,$in0,$in0,$inpperm ++ lvx_u $in2,$x20,$inp ++ vncipherlast $out4,$out4,$in3 ++ le?vperm $in1,$in1,$in1,$inpperm ++ lvx_u $in3,$x30,$inp ++ vncipherlast $out5,$out5,$in4 ++ le?vperm $in2,$in2,$in2,$inpperm ++ lvx_u $in4,$x40,$inp ++ vncipherlast $out6,$out6,$in5 ++ le?vperm $in3,$in3,$in3,$inpperm ++ lvx_u $in5,$x50,$inp ++ vncipherlast $out7,$out7,$in6 ++ le?vperm $in4,$in4,$in4,$inpperm ++ lvx_u $in6,$x60,$inp ++ vmr $ivec,$in7 ++ le?vperm $in5,$in5,$in5,$inpperm ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $in6,$in6,$in6,$inpperm ++ vxor $out0,$in0,$rndkey0 ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $in7,$in7,$in7,$inpperm ++ vxor $out1,$in1,$rndkey0 ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ vxor $out2,$in2,$rndkey0 ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ vxor $out3,$in3,$rndkey0 ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ vxor $out4,$in4,$rndkey0 ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ vxor $out5,$in5,$rndkey0 ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x60,$out ++ vxor $out6,$in6,$rndkey0 ++ stvx_u $out7,$x70,$out ++ addi $out,$out,0x80 ++ vxor $out7,$in7,$rndkey0 ++ ++ mtctr $rounds ++ beq Loop_cbc_dec8x # did $len-=128 borrow? ++ ++ addic. $len,$len,128 ++ beq Lcbc_dec8x_done ++ nop ++ nop ++ ++Loop_cbc_dec8x_tail: # up to 7 "words" tail... ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_cbc_dec8x_tail ++ ++ vncipher $out1,$out1,v24 ++ vncipher $out2,$out2,v24 ++ vncipher $out3,$out3,v24 ++ vncipher $out4,$out4,v24 ++ vncipher $out5,$out5,v24 ++ vncipher $out6,$out6,v24 ++ vncipher $out7,$out7,v24 ++ ++ vncipher $out1,$out1,v25 ++ vncipher $out2,$out2,v25 ++ vncipher $out3,$out3,v25 ++ vncipher $out4,$out4,v25 ++ vncipher $out5,$out5,v25 ++ vncipher $out6,$out6,v25 ++ vncipher $out7,$out7,v25 ++ ++ vncipher $out1,$out1,v26 ++ vncipher $out2,$out2,v26 ++ vncipher $out3,$out3,v26 ++ vncipher $out4,$out4,v26 ++ vncipher $out5,$out5,v26 ++ vncipher $out6,$out6,v26 ++ vncipher $out7,$out7,v26 ++ ++ vncipher $out1,$out1,v27 ++ vncipher $out2,$out2,v27 ++ vncipher $out3,$out3,v27 ++ vncipher $out4,$out4,v27 ++ vncipher $out5,$out5,v27 ++ vncipher $out6,$out6,v27 ++ vncipher $out7,$out7,v27 ++ ++ vncipher $out1,$out1,v28 ++ vncipher $out2,$out2,v28 ++ vncipher $out3,$out3,v28 ++ vncipher $out4,$out4,v28 ++ vncipher $out5,$out5,v28 ++ vncipher $out6,$out6,v28 ++ vncipher $out7,$out7,v28 ++ ++ vncipher $out1,$out1,v29 ++ vncipher $out2,$out2,v29 ++ vncipher $out3,$out3,v29 ++ vncipher $out4,$out4,v29 ++ vncipher $out5,$out5,v29 ++ vncipher $out6,$out6,v29 ++ vncipher $out7,$out7,v29 ++ ++ vncipher $out1,$out1,v30 ++ vxor $ivec,$ivec,v31 # last round key ++ vncipher $out2,$out2,v30 ++ vxor $in1,$in1,v31 ++ vncipher $out3,$out3,v30 ++ vxor $in2,$in2,v31 ++ vncipher $out4,$out4,v30 ++ vxor $in3,$in3,v31 ++ vncipher $out5,$out5,v30 ++ vxor $in4,$in4,v31 ++ vncipher $out6,$out6,v30 ++ vxor $in5,$in5,v31 ++ vncipher $out7,$out7,v30 ++ vxor $in6,$in6,v31 ++ ++ cmplwi $len,32 # switch($len) ++ blt Lcbc_dec8x_one ++ nop ++ beq Lcbc_dec8x_two ++ cmplwi $len,64 ++ blt Lcbc_dec8x_three ++ nop ++ beq Lcbc_dec8x_four ++ cmplwi $len,96 ++ blt Lcbc_dec8x_five ++ nop ++ beq Lcbc_dec8x_six ++ ++Lcbc_dec8x_seven: ++ vncipherlast $out1,$out1,$ivec ++ vncipherlast $out2,$out2,$in1 ++ vncipherlast $out3,$out3,$in2 ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out1,$out1,$out1,$inpperm ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x00,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x10,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x20,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x30,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x40,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x50,$out ++ stvx_u $out7,$x60,$out ++ addi $out,$out,0x70 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_six: ++ vncipherlast $out2,$out2,$ivec ++ vncipherlast $out3,$out3,$in2 ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out2,$out2,$out2,$inpperm ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x00,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x10,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x20,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x30,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x40,$out ++ stvx_u $out7,$x50,$out ++ addi $out,$out,0x60 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_five: ++ vncipherlast $out3,$out3,$ivec ++ vncipherlast $out4,$out4,$in3 ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out3,$out3,$out3,$inpperm ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x00,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x10,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x20,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x30,$out ++ stvx_u $out7,$x40,$out ++ addi $out,$out,0x50 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_four: ++ vncipherlast $out4,$out4,$ivec ++ vncipherlast $out5,$out5,$in4 ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out4,$out4,$out4,$inpperm ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x00,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x10,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x20,$out ++ stvx_u $out7,$x30,$out ++ addi $out,$out,0x40 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_three: ++ vncipherlast $out5,$out5,$ivec ++ vncipherlast $out6,$out6,$in5 ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out5,$out5,$out5,$inpperm ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x00,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x10,$out ++ stvx_u $out7,$x20,$out ++ addi $out,$out,0x30 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_two: ++ vncipherlast $out6,$out6,$ivec ++ vncipherlast $out7,$out7,$in6 ++ vmr $ivec,$in7 ++ ++ le?vperm $out6,$out6,$out6,$inpperm ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x00,$out ++ stvx_u $out7,$x10,$out ++ addi $out,$out,0x20 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lcbc_dec8x_one: ++ vncipherlast $out7,$out7,$ivec ++ vmr $ivec,$in7 ++ ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out7,0,$out ++ addi $out,$out,0x10 ++ ++Lcbc_dec8x_done: ++ le?vperm $ivec,$ivec,$ivec,$inpperm ++ stvx_u $ivec,0,$ivp # write [unaligned] iv ++ ++ li r10,`$FRAME+15` ++ li r11,`$FRAME+31` ++ stvx $inpperm,r10,$sp # wipe copies of round keys ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0x80,6,6,0 ++ .long 0 ++.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt ++___ ++}} }}} ++ ++######################################################################### ++{{{ # CTR procedure[s] # ++my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); ++my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); ++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= ++ map("v$_",(4..11)); ++my $dat=$tmp; ++ ++$code.=<<___; ++.globl .${prefix}_ctr32_encrypt_blocks ++.align 5 ++.${prefix}_ctr32_encrypt_blocks: ++ ${UCMP}i $len,1 ++ bltlr- ++ ++ lis r0,0xfff0 ++ mfspr $vrsave,256 ++ mtspr 256,r0 ++ ++ li $idx,15 ++ vxor $rndkey0,$rndkey0,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ ++ lvx $ivec,0,$ivp # load [unaligned] iv ++ lvsl $inpperm,0,$ivp ++ lvx $inptail,$idx,$ivp ++ vspltisb $one,1 ++ le?vxor $inpperm,$inpperm,$tmp ++ vperm $ivec,$ivec,$inptail,$inpperm ++ vsldoi $one,$rndkey0,$one,1 ++ ++ neg r11,$inp ++ ?lvsl $keyperm,0,$key # prepare for unaligned key ++ lwz $rounds,240($key) ++ ++ lvsr $inpperm,0,r11 # prepare for unaligned load ++ lvx $inptail,0,$inp ++ addi $inp,$inp,15 # 15 is not typo ++ le?vxor $inpperm,$inpperm,$tmp ++ ++ srwi $rounds,$rounds,1 ++ li $idx,16 ++ subi $rounds,$rounds,1 ++ ++ ${UCMP}i $len,8 ++ bge _aesp8_ctr32_encrypt8x ++ ++ ?lvsr $outperm,0,$out # prepare for unaligned store ++ vspltisb $outmask,-1 ++ lvx $outhead,0,$out ++ ?vperm $outmask,$rndkey0,$outmask,$outperm ++ le?vxor $outperm,$outperm,$tmp ++ ++ lvx $rndkey0,0,$key ++ mtctr $rounds ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vxor $inout,$ivec,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ b Loop_ctr32_enc ++ ++.align 5 ++Loop_ctr32_enc: ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vcipher $inout,$inout,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ bdnz Loop_ctr32_enc ++ ++ vadduwm $ivec,$ivec,$one ++ vmr $dat,$inptail ++ lvx $inptail,0,$inp ++ addi $inp,$inp,16 ++ subic. $len,$len,1 # blocks-- ++ ++ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm ++ vcipher $inout,$inout,$rndkey1 ++ lvx $rndkey1,$idx,$key ++ vperm $dat,$dat,$inptail,$inpperm ++ li $idx,16 ++ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm ++ lvx $rndkey0,0,$key ++ vxor $dat,$dat,$rndkey1 # last round key ++ vcipherlast $inout,$inout,$dat ++ ++ lvx $rndkey1,$idx,$key ++ addi $idx,$idx,16 ++ vperm $inout,$inout,$inout,$outperm ++ vsel $dat,$outhead,$inout,$outmask ++ mtctr $rounds ++ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm ++ vmr $outhead,$inout ++ vxor $inout,$ivec,$rndkey0 ++ lvx $rndkey0,$idx,$key ++ addi $idx,$idx,16 ++ stvx $dat,0,$out ++ addi $out,$out,16 ++ bne Loop_ctr32_enc ++ ++ addi $out,$out,-1 ++ lvx $inout,0,$out # redundant in aligned case ++ vsel $inout,$outhead,$inout,$outmask ++ stvx $inout,0,$out ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,6,0 ++ .long 0 ++___ ++######################################################################### ++{{ # Optimized CTR procedure # ++my $key_="r11"; ++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); ++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); ++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); ++my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys ++ # v26-v31 last 6 round keys ++my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment ++my ($two,$three,$four)=($outhead,$outperm,$outmask); ++ ++$code.=<<___; ++.align 5 ++_aesp8_ctr32_encrypt8x: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r0,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ mtspr 256,r0 ++ ++ subi $rounds,$rounds,3 # -4 in total ++ ++ lvx $rndkey0,$x00,$key # load key schedule ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ lvx v31,$x00,$key ++ ?vperm $rndkey0,$rndkey0,v30,$keyperm ++ addi $key_,$sp,$FRAME+15 ++ mtctr $rounds ++ ++Load_ctr32_enc_key: ++ ?vperm v24,v30,v31,$keyperm ++ lvx v30,$x10,$key ++ addi $key,$key,0x20 ++ stvx v24,$x00,$key_ # off-load round[1] ++ ?vperm v25,v31,v30,$keyperm ++ lvx v31,$x00,$key ++ stvx v25,$x10,$key_ # off-load round[2] ++ addi $key_,$key_,0x20 ++ bdnz Load_ctr32_enc_key ++ ++ lvx v26,$x10,$key ++ ?vperm v24,v30,v31,$keyperm ++ lvx v27,$x20,$key ++ stvx v24,$x00,$key_ # off-load round[3] ++ ?vperm v25,v31,v26,$keyperm ++ lvx v28,$x30,$key ++ stvx v25,$x10,$key_ # off-load round[4] ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ ?vperm v26,v26,v27,$keyperm ++ lvx v29,$x40,$key ++ ?vperm v27,v27,v28,$keyperm ++ lvx v30,$x50,$key ++ ?vperm v28,v28,v29,$keyperm ++ lvx v31,$x60,$key ++ ?vperm v29,v29,v30,$keyperm ++ lvx $out0,$x70,$key # borrow $out0 ++ ?vperm v30,v30,v31,$keyperm ++ lvx v24,$x00,$key_ # pre-load round[1] ++ ?vperm v31,v31,$out0,$keyperm ++ lvx v25,$x10,$key_ # pre-load round[2] ++ ++ vadduwm $two,$one,$one ++ subi $inp,$inp,15 # undo "caller" ++ $SHL $len,$len,4 ++ ++ vadduwm $out1,$ivec,$one # counter values ... ++ vadduwm $out2,$ivec,$two ++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] ++ le?li $idx,8 ++ vadduwm $out3,$out1,$two ++ vxor $out1,$out1,$rndkey0 ++ le?lvsl $inpperm,0,$idx ++ vadduwm $out4,$out2,$two ++ vxor $out2,$out2,$rndkey0 ++ le?vspltisb $tmp,0x0f ++ vadduwm $out5,$out3,$two ++ vxor $out3,$out3,$rndkey0 ++ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u ++ vadduwm $out6,$out4,$two ++ vxor $out4,$out4,$rndkey0 ++ vadduwm $out7,$out5,$two ++ vxor $out5,$out5,$rndkey0 ++ vadduwm $ivec,$out6,$two # next counter value ++ vxor $out6,$out6,$rndkey0 ++ vxor $out7,$out7,$rndkey0 ++ ++ mtctr $rounds ++ b Loop_ctr32_enc8x ++.align 5 ++Loop_ctr32_enc8x: ++ vcipher $out0,$out0,v24 ++ vcipher $out1,$out1,v24 ++ vcipher $out2,$out2,v24 ++ vcipher $out3,$out3,v24 ++ vcipher $out4,$out4,v24 ++ vcipher $out5,$out5,v24 ++ vcipher $out6,$out6,v24 ++ vcipher $out7,$out7,v24 ++Loop_ctr32_enc8x_middle: ++ lvx v24,$x20,$key_ # round[3] ++ addi $key_,$key_,0x20 ++ ++ vcipher $out0,$out0,v25 ++ vcipher $out1,$out1,v25 ++ vcipher $out2,$out2,v25 ++ vcipher $out3,$out3,v25 ++ vcipher $out4,$out4,v25 ++ vcipher $out5,$out5,v25 ++ vcipher $out6,$out6,v25 ++ vcipher $out7,$out7,v25 ++ lvx v25,$x10,$key_ # round[4] ++ bdnz Loop_ctr32_enc8x ++ ++ subic r11,$len,256 # $len-256, borrow $key_ ++ vcipher $out0,$out0,v24 ++ vcipher $out1,$out1,v24 ++ vcipher $out2,$out2,v24 ++ vcipher $out3,$out3,v24 ++ vcipher $out4,$out4,v24 ++ vcipher $out5,$out5,v24 ++ vcipher $out6,$out6,v24 ++ vcipher $out7,$out7,v24 ++ ++ subfe r0,r0,r0 # borrow?-1:0 ++ vcipher $out0,$out0,v25 ++ vcipher $out1,$out1,v25 ++ vcipher $out2,$out2,v25 ++ vcipher $out3,$out3,v25 ++ vcipher $out4,$out4,v25 ++ vcipher $out5,$out5,v25 ++ vcipher $out6,$out6,v25 ++ vcipher $out7,$out7,v25 ++ ++ and r0,r0,r11 ++ addi $key_,$sp,$FRAME+15 # rewind $key_ ++ vcipher $out0,$out0,v26 ++ vcipher $out1,$out1,v26 ++ vcipher $out2,$out2,v26 ++ vcipher $out3,$out3,v26 ++ vcipher $out4,$out4,v26 ++ vcipher $out5,$out5,v26 ++ vcipher $out6,$out6,v26 ++ vcipher $out7,$out7,v26 ++ lvx v24,$x00,$key_ # re-pre-load round[1] ++ ++ subic $len,$len,129 # $len-=129 ++ vcipher $out0,$out0,v27 ++ addi $len,$len,1 # $len-=128 really ++ vcipher $out1,$out1,v27 ++ vcipher $out2,$out2,v27 ++ vcipher $out3,$out3,v27 ++ vcipher $out4,$out4,v27 ++ vcipher $out5,$out5,v27 ++ vcipher $out6,$out6,v27 ++ vcipher $out7,$out7,v27 ++ lvx v25,$x10,$key_ # re-pre-load round[2] ++ ++ vcipher $out0,$out0,v28 ++ lvx_u $in0,$x00,$inp # load input ++ vcipher $out1,$out1,v28 ++ lvx_u $in1,$x10,$inp ++ vcipher $out2,$out2,v28 ++ lvx_u $in2,$x20,$inp ++ vcipher $out3,$out3,v28 ++ lvx_u $in3,$x30,$inp ++ vcipher $out4,$out4,v28 ++ lvx_u $in4,$x40,$inp ++ vcipher $out5,$out5,v28 ++ lvx_u $in5,$x50,$inp ++ vcipher $out6,$out6,v28 ++ lvx_u $in6,$x60,$inp ++ vcipher $out7,$out7,v28 ++ lvx_u $in7,$x70,$inp ++ addi $inp,$inp,0x80 ++ ++ vcipher $out0,$out0,v29 ++ le?vperm $in0,$in0,$in0,$inpperm ++ vcipher $out1,$out1,v29 ++ le?vperm $in1,$in1,$in1,$inpperm ++ vcipher $out2,$out2,v29 ++ le?vperm $in2,$in2,$in2,$inpperm ++ vcipher $out3,$out3,v29 ++ le?vperm $in3,$in3,$in3,$inpperm ++ vcipher $out4,$out4,v29 ++ le?vperm $in4,$in4,$in4,$inpperm ++ vcipher $out5,$out5,v29 ++ le?vperm $in5,$in5,$in5,$inpperm ++ vcipher $out6,$out6,v29 ++ le?vperm $in6,$in6,$in6,$inpperm ++ vcipher $out7,$out7,v29 ++ le?vperm $in7,$in7,$in7,$inpperm ++ ++ add $inp,$inp,r0 # $inp is adjusted in such ++ # way that at exit from the ++ # loop inX-in7 are loaded ++ # with last "words" ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vcipher $out0,$out0,v30 ++ vxor $in0,$in0,v31 # xor with last round key ++ vcipher $out1,$out1,v30 ++ vxor $in1,$in1,v31 ++ vcipher $out2,$out2,v30 ++ vxor $in2,$in2,v31 ++ vcipher $out3,$out3,v30 ++ vxor $in3,$in3,v31 ++ vcipher $out4,$out4,v30 ++ vxor $in4,$in4,v31 ++ vcipher $out5,$out5,v30 ++ vxor $in5,$in5,v31 ++ vcipher $out6,$out6,v30 ++ vxor $in6,$in6,v31 ++ vcipher $out7,$out7,v30 ++ vxor $in7,$in7,v31 ++ ++ bne Lctr32_enc8x_break # did $len-129 borrow? ++ ++ vcipherlast $in0,$out0,$in0 ++ vcipherlast $in1,$out1,$in1 ++ vadduwm $out1,$ivec,$one # counter values ... ++ vcipherlast $in2,$out2,$in2 ++ vadduwm $out2,$ivec,$two ++ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] ++ vcipherlast $in3,$out3,$in3 ++ vadduwm $out3,$out1,$two ++ vxor $out1,$out1,$rndkey0 ++ vcipherlast $in4,$out4,$in4 ++ vadduwm $out4,$out2,$two ++ vxor $out2,$out2,$rndkey0 ++ vcipherlast $in5,$out5,$in5 ++ vadduwm $out5,$out3,$two ++ vxor $out3,$out3,$rndkey0 ++ vcipherlast $in6,$out6,$in6 ++ vadduwm $out6,$out4,$two ++ vxor $out4,$out4,$rndkey0 ++ vcipherlast $in7,$out7,$in7 ++ vadduwm $out7,$out5,$two ++ vxor $out5,$out5,$rndkey0 ++ le?vperm $in0,$in0,$in0,$inpperm ++ vadduwm $ivec,$out6,$two # next counter value ++ vxor $out6,$out6,$rndkey0 ++ le?vperm $in1,$in1,$in1,$inpperm ++ vxor $out7,$out7,$rndkey0 ++ mtctr $rounds ++ ++ vcipher $out0,$out0,v24 ++ stvx_u $in0,$x00,$out ++ le?vperm $in2,$in2,$in2,$inpperm ++ vcipher $out1,$out1,v24 ++ stvx_u $in1,$x10,$out ++ le?vperm $in3,$in3,$in3,$inpperm ++ vcipher $out2,$out2,v24 ++ stvx_u $in2,$x20,$out ++ le?vperm $in4,$in4,$in4,$inpperm ++ vcipher $out3,$out3,v24 ++ stvx_u $in3,$x30,$out ++ le?vperm $in5,$in5,$in5,$inpperm ++ vcipher $out4,$out4,v24 ++ stvx_u $in4,$x40,$out ++ le?vperm $in6,$in6,$in6,$inpperm ++ vcipher $out5,$out5,v24 ++ stvx_u $in5,$x50,$out ++ le?vperm $in7,$in7,$in7,$inpperm ++ vcipher $out6,$out6,v24 ++ stvx_u $in6,$x60,$out ++ vcipher $out7,$out7,v24 ++ stvx_u $in7,$x70,$out ++ addi $out,$out,0x80 ++ ++ b Loop_ctr32_enc8x_middle ++ ++.align 5 ++Lctr32_enc8x_break: ++ cmpwi $len,-0x60 ++ blt Lctr32_enc8x_one ++ nop ++ beq Lctr32_enc8x_two ++ cmpwi $len,-0x40 ++ blt Lctr32_enc8x_three ++ nop ++ beq Lctr32_enc8x_four ++ cmpwi $len,-0x20 ++ blt Lctr32_enc8x_five ++ nop ++ beq Lctr32_enc8x_six ++ cmpwi $len,0x00 ++ blt Lctr32_enc8x_seven ++ ++Lctr32_enc8x_eight: ++ vcipherlast $out0,$out0,$in0 ++ vcipherlast $out1,$out1,$in1 ++ vcipherlast $out2,$out2,$in2 ++ vcipherlast $out3,$out3,$in3 ++ vcipherlast $out4,$out4,$in4 ++ vcipherlast $out5,$out5,$in5 ++ vcipherlast $out6,$out6,$in6 ++ vcipherlast $out7,$out7,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ le?vperm $out7,$out7,$out7,$inpperm ++ stvx_u $out6,$x60,$out ++ stvx_u $out7,$x70,$out ++ addi $out,$out,0x80 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_seven: ++ vcipherlast $out0,$out0,$in1 ++ vcipherlast $out1,$out1,$in2 ++ vcipherlast $out2,$out2,$in3 ++ vcipherlast $out3,$out3,$in4 ++ vcipherlast $out4,$out4,$in5 ++ vcipherlast $out5,$out5,$in6 ++ vcipherlast $out6,$out6,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ le?vperm $out6,$out6,$out6,$inpperm ++ stvx_u $out5,$x50,$out ++ stvx_u $out6,$x60,$out ++ addi $out,$out,0x70 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_six: ++ vcipherlast $out0,$out0,$in2 ++ vcipherlast $out1,$out1,$in3 ++ vcipherlast $out2,$out2,$in4 ++ vcipherlast $out3,$out3,$in5 ++ vcipherlast $out4,$out4,$in6 ++ vcipherlast $out5,$out5,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ le?vperm $out5,$out5,$out5,$inpperm ++ stvx_u $out4,$x40,$out ++ stvx_u $out5,$x50,$out ++ addi $out,$out,0x60 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_five: ++ vcipherlast $out0,$out0,$in3 ++ vcipherlast $out1,$out1,$in4 ++ vcipherlast $out2,$out2,$in5 ++ vcipherlast $out3,$out3,$in6 ++ vcipherlast $out4,$out4,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ le?vperm $out4,$out4,$out4,$inpperm ++ stvx_u $out3,$x30,$out ++ stvx_u $out4,$x40,$out ++ addi $out,$out,0x50 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_four: ++ vcipherlast $out0,$out0,$in4 ++ vcipherlast $out1,$out1,$in5 ++ vcipherlast $out2,$out2,$in6 ++ vcipherlast $out3,$out3,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ le?vperm $out3,$out3,$out3,$inpperm ++ stvx_u $out2,$x20,$out ++ stvx_u $out3,$x30,$out ++ addi $out,$out,0x40 ++ b Lctr32_enc8x_done ++ ++.align 5 ++Lctr32_enc8x_three: ++ vcipherlast $out0,$out0,$in5 ++ vcipherlast $out1,$out1,$in6 ++ vcipherlast $out2,$out2,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ le?vperm $out2,$out2,$out2,$inpperm ++ stvx_u $out1,$x10,$out ++ stvx_u $out2,$x20,$out ++ addi $out,$out,0x30 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lctr32_enc8x_two: ++ vcipherlast $out0,$out0,$in6 ++ vcipherlast $out1,$out1,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ le?vperm $out1,$out1,$out1,$inpperm ++ stvx_u $out0,$x00,$out ++ stvx_u $out1,$x10,$out ++ addi $out,$out,0x20 ++ b Lcbc_dec8x_done ++ ++.align 5 ++Lctr32_enc8x_one: ++ vcipherlast $out0,$out0,$in7 ++ ++ le?vperm $out0,$out0,$out0,$inpperm ++ stvx_u $out0,0,$out ++ addi $out,$out,0x10 ++ ++Lctr32_enc8x_done: ++ li r10,`$FRAME+15` ++ li r11,`$FRAME+31` ++ stvx $inpperm,r10,$sp # wipe copies of round keys ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ stvx $inpperm,r10,$sp ++ addi r10,r10,32 ++ stvx $inpperm,r11,$sp ++ addi r11,r11,32 ++ ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0x80,6,6,0 ++ .long 0 ++.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks ++___ ++}} }}} ++ ++my $consts=1; ++foreach(split("\n",$code)) { ++ s/\`([^\`]*)\`/eval($1)/geo; ++ ++ # constants table endian-specific conversion ++ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { ++ my $conv=$3; ++ my @bytes=(); ++ ++ # convert to endian-agnostic format ++ if ($1 eq "long") { ++ foreach (split(/,\s*/,$2)) { ++ my $l = /^0/?oct:int; ++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; ++ } ++ } else { ++ @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); ++ } ++ ++ # little-endian conversion ++ if ($flavour =~ /le$/o) { ++ SWITCH: for($conv) { ++ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; ++ /\?rev/ && do { @bytes=reverse(@bytes); last; }; ++ } ++ } ++ ++ #emit ++ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; ++ next; ++ } ++ $consts=0 if (m/Lconsts:/o); # end of table ++ ++ # instructions prefixed with '?' are endian-specific and need ++ # to be adjusted accordingly... ++ if ($flavour =~ /le$/o) { # little-endian ++ s/le\?//o or ++ s/be\?/#be#/o or ++ s/\?lvsr/lvsl/o or ++ s/\?lvsl/lvsr/o or ++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or ++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or ++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; ++ } else { # big-endian ++ s/le\?/#le#/o or ++ s/be\?//o or ++ s/\?([a-z]+)/$1/o; ++ } ++ ++ print $_,"\n"; ++} ++ ++close STDOUT; +diff --git a/crypto/aes/asm/vpaes-ppc.pl b/crypto/aes/asm/vpaes-ppc.pl +new file mode 100644 +index 0000000..7fda60e +--- /dev/null ++++ b/crypto/aes/asm/vpaes-ppc.pl +@@ -0,0 +1,1512 @@ ++#!/usr/bin/env perl ++ ++###################################################################### ++## Constant-time SSSE3 AES core implementation. ++## version 0.1 ++## ++## By Mike Hamburg (Stanford University), 2009 ++## Public domain. ++## ++## For details see http://shiftleft.org/papers/vector_aes/ and ++## http://crypto.stanford.edu/vpaes/. ++ ++# CBC encrypt/decrypt performance in cycles per byte processed with ++# 128-bit key. ++# ++# aes-ppc.pl this ++# G4e 35.5/52.1/(23.8) 11.9(*)/15.4 ++# POWER6 42.7/54.3/(28.2) 63.0/92.8(**) ++# POWER7 32.3/42.9/(18.4) 18.5/23.3 ++# ++# (*) This is ~10% worse than reported in paper. The reason is ++# twofold. This module doesn't make any assumption about ++# key schedule (or data for that matter) alignment and handles ++# it in-line. Secondly it, being transliterated from ++# vpaes-x86_64.pl, relies on "nested inversion" better suited ++# for Intel CPUs. ++# (**) Inadequate POWER6 performance is due to astronomic AltiVec ++# latency, 9 cycles per simple logical operation. ++ ++$flavour = shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T =8; ++ $LRSAVE =2*$SIZE_T; ++ $STU ="stdu"; ++ $POP ="ld"; ++ $PUSH ="std"; ++ $UCMP ="cmpld"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T =4; ++ $LRSAVE =$SIZE_T; ++ $STU ="stwu"; ++ $POP ="lwz"; ++ $PUSH ="stw"; ++ $UCMP ="cmplw"; ++} else { die "nonsense $flavour"; } ++ ++$sp="r1"; ++$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; ++ ++$code.=<<___; ++.machine "any" ++ ++.text ++ ++.align 7 # totally strategic alignment ++_vpaes_consts: ++Lk_mc_forward: # mc_forward ++ .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv ++ .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv ++ .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv ++ .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv ++Lk_mc_backward: # mc_backward ++ .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv ++ .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv ++ .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv ++ .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv ++Lk_sr: # sr ++ .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv ++ .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv ++ .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv ++ .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv ++ ++## ++## "Hot" constants ++## ++Lk_inv: # inv, inva ++ .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev ++ .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev ++Lk_ipt: # input transform (lo, hi) ++ .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev ++ .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev ++Lk_sbo: # sbou, sbot ++ .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev ++ .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev ++Lk_sb1: # sb1u, sb1t ++ .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev ++ .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev ++Lk_sb2: # sb2u, sb2t ++ .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev ++ .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev ++ ++## ++## Decryption stuff ++## ++Lk_dipt: # decryption input transform ++ .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev ++ .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev ++Lk_dsbo: # decryption sbox final output ++ .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev ++ .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev ++Lk_dsb9: # decryption sbox output *9*u, *9*t ++ .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev ++ .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev ++Lk_dsbd: # decryption sbox output *D*u, *D*t ++ .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev ++ .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev ++Lk_dsbb: # decryption sbox output *B*u, *B*t ++ .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev ++ .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev ++Lk_dsbe: # decryption sbox output *E*u, *E*t ++ .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev ++ .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev ++ ++## ++## Key schedule constants ++## ++Lk_dksd: # decryption key schedule: invskew x*D ++ .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev ++ .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev ++Lk_dksb: # decryption key schedule: invskew x*B ++ .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev ++ .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev ++Lk_dkse: # decryption key schedule: invskew x*E + 0x63 ++ .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev ++ .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev ++Lk_dks9: # decryption key schedule: invskew x*9 ++ .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev ++ .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev ++ ++Lk_rcon: # rcon ++ .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis ++Lk_s63: ++ .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis ++ ++Lk_opt: # output transform ++ .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev ++ .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev ++Lk_deskew: # deskew tables: inverts the sbox's "skew" ++ .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev ++ .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev ++.align 5 ++Lconsts: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr r12 #vvvvv "distance between . and _vpaes_consts ++ addi r12,r12,-0x308 ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)" ++.align 6 ++___ ++ ++my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31)); ++{ ++my ($inp,$out,$key) = map("r$_",(3..5)); ++ ++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15)); ++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19)); ++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23)); ++ ++$code.=<<___; ++## ++## _aes_preheat ++## ++## Fills register %r10 -> .aes_consts (so you can -fPIC) ++## and %xmm9-%xmm15 as specified below. ++## ++.align 4 ++_vpaes_encrypt_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0xe0 # Lk_ipt ++ li r8, 0xf0 ++ vxor v7, v7, v7 # 0x00..00 ++ vspltisb v8,4 # 0x04..04 ++ vspltisb v9,0x0f # 0x0f..0f ++ lvx $invlo, r12, r11 ++ li r11, 0x100 ++ lvx $invhi, r12, r10 ++ li r10, 0x110 ++ lvx $iptlo, r12, r9 ++ li r9, 0x120 ++ lvx $ipthi, r12, r8 ++ li r8, 0x130 ++ lvx $sbou, r12, r11 ++ li r11, 0x140 ++ lvx $sbot, r12, r10 ++ li r10, 0x150 ++ lvx $sb1u, r12, r9 ++ lvx $sb1t, r12, r8 ++ lvx $sb2u, r12, r11 ++ lvx $sb2t, r12, r10 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## _aes_encrypt_core ++## ++## AES-encrypt %xmm0. ++## ++## Inputs: ++## %xmm0 = input ++## %xmm9-%xmm15 as in _vpaes_preheat ++## (%rdx) = scheduled keys ++## ++## Output in %xmm0 ++## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax ++## ++## ++.align 5 ++_vpaes_encrypt_core: ++ lwz r8, 240($key) # pull rounds ++ li r9, 16 ++ lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key ++ li r11, 0x10 ++ lvx v6, r9, $key ++ addi r9, r9, 16 ++ ?vperm v5, v5, v6, $keyperm # align round key ++ addi r10, r11, 0x40 ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1 ++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2 ++ vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0 ++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 ++ mtctr r8 ++ b Lenc_entry ++ ++.align 4 ++Lenc_loop: ++ # middle of middle round ++ vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[] ++ addi r11, r11, 16 ++ vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4 ++ vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[] ++ addi r10, r11, 0x40 ++ vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ ++Lenc_entry: ++ # top of round ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vand v0, v0, v9 ++ vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vmr v5, v6 ++ lvx v6, r9, $key # vmovdqu (%r9), %xmm5 ++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ addi r9, r9, 16 ++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ bdnz Lenc_loop ++ ++ # middle of last round ++ addi r10, r11, 0x80 ++ # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo ++ # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[] ++ vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_encrypt ++.align 5 ++.vpaes_encrypt: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r6 ++ mfspr r7, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r7,`$FRAME-4`($sp) # save vrsave ++ li r0, -1 ++ $PUSH r6,`$FRAME+$LRSAVE`($sp) ++ mtspr 256, r0 # preserve all AltiVec registers ++ ++ bl _vpaes_encrypt_preheat ++ ++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not a typo ++ ?lvsr $outperm, 0, $out ++ ?lvsl $keyperm, 0, $key # prepare for unaligned access ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp # redundant in aligned case ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, $out ++ ?vperm v0, v0, $inptail, $inpperm ++ ++ bl _vpaes_encrypt_core ++ ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 15 # 15 is not a typo ++ ######## ++ ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtlr r6 ++ mtspr 256, r7 # restore vrsave ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_encrypt,.-.vpaes_encrypt ++ ++.align 4 ++_vpaes_decrypt_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0x160 # Ldipt ++ li r8, 0x170 ++ vxor v7, v7, v7 # 0x00..00 ++ vspltisb v8,4 # 0x04..04 ++ vspltisb v9,0x0f # 0x0f..0f ++ lvx $invlo, r12, r11 ++ li r11, 0x180 ++ lvx $invhi, r12, r10 ++ li r10, 0x190 ++ lvx $iptlo, r12, r9 ++ li r9, 0x1a0 ++ lvx $ipthi, r12, r8 ++ li r8, 0x1b0 ++ lvx $sbou, r12, r11 ++ li r11, 0x1c0 ++ lvx $sbot, r12, r10 ++ li r10, 0x1d0 ++ lvx $sb9u, r12, r9 ++ li r9, 0x1e0 ++ lvx $sb9t, r12, r8 ++ li r8, 0x1f0 ++ lvx $sbdu, r12, r11 ++ li r11, 0x200 ++ lvx $sbdt, r12, r10 ++ li r10, 0x210 ++ lvx $sbbu, r12, r9 ++ lvx $sbbt, r12, r8 ++ lvx $sbeu, r12, r11 ++ lvx $sbet, r12, r10 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## Decryption core ++## ++## Same API as encryption core. ++## ++.align 4 ++_vpaes_decrypt_core: ++ lwz r8, 240($key) # pull rounds ++ li r9, 16 ++ lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key ++ li r11, 0x30 ++ lvx v6, r9, $key ++ addi r9, r9, 16 ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 ++ vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0 ++ vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2 ++ vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0 ++ mtctr r8 ++ b Ldec_entry ++ ++.align 4 ++Ldec_loop: ++# ++# Inverse mix columns ++# ++ lvx v0, r12, r11 # v5 and v0 are flipped ++ # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u ++ # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t ++ vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u ++ subi r11, r11, 16 ++ vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t ++ andi. r11, r11, 0x30 ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 ++ # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt ++ ++ vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt ++ ++ vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt ++ vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu ++ vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet ++ ++ vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu ++ vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet ++ vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ ++Ldec_entry: ++ # top of round ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vand v0, v0, v9 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vmr v5, v6 ++ lvx v6, r9, $key # vmovdqu (%r9), %xmm0 ++ vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ addi r9, r9, 16 ++ vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ ?vperm v5, v5, v6, $keyperm # align round key ++ vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ bdnz Ldec_loop ++ ++ # middle of last round ++ addi r10, r11, 0x80 ++ # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou ++ vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot ++ lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160 ++ vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t ++ vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k ++ vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A ++ vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_decrypt ++.align 5 ++.vpaes_decrypt: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r6 ++ mfspr r7, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r7,`$FRAME-4`($sp) # save vrsave ++ li r0, -1 ++ $PUSH r6,`$FRAME+$LRSAVE`($sp) ++ mtspr 256, r0 # preserve all AltiVec registers ++ ++ bl _vpaes_decrypt_preheat ++ ++ ?lvsl $inpperm, 0, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not a typo ++ ?lvsr $outperm, 0, $out ++ ?lvsl $keyperm, 0, $key ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp # redundant in aligned case ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, $out ++ ?vperm v0, v0, $inptail, $inpperm ++ ++ bl _vpaes_decrypt_core ++ ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 15 # 15 is not a typo ++ ######## ++ ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtlr r6 ++ mtspr 256, r7 # restore vrsave ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_decrypt,.-.vpaes_decrypt ++ ++.globl .vpaes_cbc_encrypt ++.align 5 ++.vpaes_cbc_encrypt: ++ ${UCMP}i r5,16 ++ bltlr- ++ ++ $STU $sp,-`($FRAME+2*$SIZE_T)`($sp) ++ mflr r0 ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mfspr r12, 256 ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r12,`$FRAME-4`($sp) # save vrsave ++ $PUSH r30,`$FRAME+$SIZE_T*0`($sp) ++ $PUSH r31,`$FRAME+$SIZE_T*1`($sp) ++ li r9, -16 ++ $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) ++ ++ and r30, r5, r9 # copy length&-16 ++ mr r5, r6 # copy pointer to key ++ mr r31, r7 # copy pointer to iv ++ blt Lcbc_abort ++ cmpwi r8, 0 # test direction ++ li r6, -1 ++ mr r7, r12 # copy vrsave ++ mtspr 256, r6 # preserve all AltiVec registers ++ ++ lvx v24, 0, r31 # load [potentially unaligned] iv ++ li r9, 15 ++ ?lvsl $inpperm, 0, r31 ++ lvx v25, r9, r31 ++ ?vperm v24, v24, v25, $inpperm ++ ++ neg r8, $inp # prepare for unaligned access ++ vxor v7, v7, v7 ++ ?lvsl $keyperm, 0, $key ++ ?lvsr $outperm, 0, $out ++ ?lvsr $inpperm, 0, r8 # -$inp ++ vnor $outmask, v7, v7 # 0xff..ff ++ lvx $inptail, 0, $inp ++ ?vperm $outmask, v7, $outmask, $outperm ++ addi $inp, $inp, 15 # 15 is not a typo ++ lvx $outhead, 0, $out ++ ++ beq Lcbc_decrypt ++ ++ bl _vpaes_encrypt_preheat ++ li r0, 16 ++ ++Lcbc_enc_loop: ++ vmr v0, $inptail ++ lvx $inptail, 0, $inp ++ addi $inp, $inp, 16 ++ ?vperm v0, v0, $inptail, $inpperm ++ vxor v0, v0, v24 # ^= iv ++ ++ bl _vpaes_encrypt_core ++ ++ vmr v24, v0 # put aside iv ++ sub. r30, r30, r0 # len -= 16 ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 16 ++ bne Lcbc_enc_loop ++ ++ b Lcbc_done ++ ++.align 5 ++Lcbc_decrypt: ++ bl _vpaes_decrypt_preheat ++ li r0, 16 ++ ++Lcbc_dec_loop: ++ vmr v0, $inptail ++ lvx $inptail, 0, $inp ++ addi $inp, $inp, 16 ++ ?vperm v0, v0, $inptail, $inpperm ++ vmr v25, v0 # put aside input ++ ++ bl _vpaes_decrypt_core ++ ++ vxor v0, v0, v24 # ^= iv ++ vmr v24, v25 ++ sub. r30, r30, r0 # len -= 16 ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v1, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v1, 0, $out ++ addi $out, $out, 16 ++ bne Lcbc_dec_loop ++ ++Lcbc_done: ++ addi $out, $out, -1 ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++ neg r8, r31 # write [potentially unaligned] iv ++ ?lvsl $outperm, 0, r8 ++ li r6, 15 ++ vnor $outmask, v7, v7 # 0xff..ff ++ ?vperm $outmask, v7, $outmask, $outperm ++ lvx $outhead, 0, r31 ++ vperm v24, v24, v24, $outperm # rotate right/left ++ vsel v0, $outhead, v24, $outmask ++ lvx v1, r6, r31 ++ stvx v0, 0, r31 ++ vsel v1, v24, v1, $outmask ++ stvx v1, r6, r31 ++ ++ mtspr 256, r7 # restore vrsave ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++Lcbc_abort: ++ $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp) ++ $POP r30,`$FRAME+$SIZE_T*0`($sp) ++ $POP r31,`$FRAME+$SIZE_T*1`($sp) ++ mtlr r0 ++ addi $sp,$sp,`$FRAME+$SIZE_T*2` ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,2,6,0 ++ .long 0 ++.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt ++___ ++} ++{ ++my ($inp,$bits,$out)=map("r$_",(3..5)); ++my $dir="cr1"; ++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24)); ++ ++$code.=<<___; ++######################################################## ++## ## ++## AES key schedule ## ++## ## ++######################################################## ++.align 4 ++_vpaes_key_preheat: ++ mflr r8 ++ bl Lconsts ++ mtlr r8 ++ li r11, 0xc0 # Lk_inv ++ li r10, 0xd0 ++ li r9, 0xe0 # L_ipt ++ li r8, 0xf0 ++ ++ vspltisb v8,4 # 0x04..04 ++ vxor v9,v9,v9 # 0x00..00 ++ lvx $invlo, r12, r11 # Lk_inv ++ li r11, 0x120 ++ lvx $invhi, r12, r10 ++ li r10, 0x130 ++ lvx $iptlo, r12, r9 # Lk_ipt ++ li r9, 0x220 ++ lvx $ipthi, r12, r8 ++ li r8, 0x230 ++ ++ lvx v14, r12, r11 # Lk_sb1 ++ li r11, 0x240 ++ lvx v15, r12, r10 ++ li r10, 0x250 ++ ++ lvx v16, r12, r9 # Lk_dksd ++ li r9, 0x260 ++ lvx v17, r12, r8 ++ li r8, 0x270 ++ lvx v18, r12, r11 # Lk_dksb ++ li r11, 0x280 ++ lvx v19, r12, r10 ++ li r10, 0x290 ++ lvx v20, r12, r9 # Lk_dkse ++ li r9, 0x2a0 ++ lvx v21, r12, r8 ++ li r8, 0x2b0 ++ lvx v22, r12, r11 # Lk_dks9 ++ lvx v23, r12, r10 ++ ++ lvx v24, r12, r9 # Lk_rcon ++ lvx v25, 0, r12 # Lk_mc_forward[0] ++ lvx v26, r12, r8 # Lks63 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.align 4 ++_vpaes_schedule_core: ++ mflr r7 ++ ++ bl _vpaes_key_preheat # load the tables ++ ++ #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned) ++ neg r8, $inp # prepare for unaligned access ++ lvx v0, 0, $inp ++ addi $inp, $inp, 15 # 15 is not typo ++ ?lvsr $inpperm, 0, r8 # -$inp ++ lvx v6, 0, $inp # v6 serves as inptail ++ addi $inp, $inp, 8 ++ ?vperm v0, v0, v6, $inpperm ++ ++ # input transform ++ vmr v3, v0 # vmovdqa %xmm0, %xmm3 ++ bl _vpaes_schedule_transform ++ vmr v7, v0 # vmovdqa %xmm0, %xmm7 ++ ++ bne $dir, Lschedule_am_decrypting ++ ++ # encrypting, output zeroth round key after transform ++ li r8, 0x30 # mov \$0x30,%r8d ++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 ++ ++ ?lvsr $outperm, 0, $out # prepare for unaligned access ++ vnor $outmask, v9, v9 # 0xff..ff ++ lvx $outhead, 0, $out ++ ?vperm $outmask, v9, $outmask, $outperm ++ ++ #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx) ++ vperm v1, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ b Lschedule_go ++ ++Lschedule_am_decrypting: ++ srwi r8, $bits, 1 # shr \$1,%r8d ++ andi. r8, r8, 32 # and \$32,%r8d ++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 ++ addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10 ++ # decrypting, output zeroth round key after shiftrows ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ ++ neg r0, $out # prepare for unaligned access ++ ?lvsl $outperm, 0, r0 ++ addi $out, $out, 15 # 15 is not typo ++ vnor $outmask, v9, v9 # 0xff..ff ++ lvx $outhead, 0, $out ++ ?vperm $outmask, $outmask, v9, $outperm ++ ++ #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v4, v4, v4, $outperm # rotate right/left ++ vsel v2, $outhead, v4, $outmask ++ vmr $outhead, v4 ++ stvx v2, 0, $out ++ xori r8, r8, 0x30 # xor \$0x30, %r8 ++ ++Lschedule_go: ++ cmplwi $bits, 192 # cmp \$192, %esi ++ bgt Lschedule_256 ++ beq Lschedule_192 ++ # 128: fall though ++ ++## ++## .schedule_128 ++## ++## 128-bit specific part of key schedule. ++## ++## This schedule is really simple, because all its parts ++## are accomplished by the subroutines. ++## ++Lschedule_128: ++ li r0, 10 # mov \$10, %esi ++ mtctr r0 ++ ++Loop_schedule_128: ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle # write output ++ b Loop_schedule_128 ++ ++## ++## .aes_schedule_192 ++## ++## 192-bit specific part of key schedule. ++## ++## The main body of this schedule is the same as the 128-bit ++## schedule, but with more smearing. The long, high side is ++## stored in %xmm7 as before, and the short, low side is in ++## the high bits of %xmm6. ++## ++## This schedule is somewhat nastier, however, because each ++## round produces 192 bits of key material, or 1.5 round keys. ++## Therefore, on each cycle we do 2 rounds and produce 3 round ++## keys. ++## ++.align 4 ++Lschedule_192: ++ li r0, 4 # mov \$4, %esi ++ lvx v0, 0, $inp ++ ?vperm v0, v6, v0, $inpperm ++ ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) ++ bl _vpaes_schedule_transform # input transform ++ ?vsldoi v6, v0, v9, 8 ++ ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros ++ mtctr r0 ++ ++Loop_schedule_192: ++ bl _vpaes_schedule_round ++ ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0 ++ bl _vpaes_schedule_mangle # save key n ++ bl _vpaes_schedule_192_smear ++ bl _vpaes_schedule_mangle # save key n+1 ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle # save key n+2 ++ bl _vpaes_schedule_192_smear ++ b Loop_schedule_192 ++ ++## ++## .aes_schedule_256 ++## ++## 256-bit specific part of key schedule. ++## ++## The structure here is very similar to the 128-bit ++## schedule, but with an additional "low side" in ++## %xmm6. The low side's rounds are the same as the ++## high side's, except no rcon and no rotation. ++## ++.align 4 ++Lschedule_256: ++ li r0, 7 # mov \$7, %esi ++ addi $inp, $inp, 8 ++ lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) ++ ?vperm v0, v6, v0, $inpperm ++ bl _vpaes_schedule_transform # input transform ++ mtctr r0 ++ ++Loop_schedule_256: ++ bl _vpaes_schedule_mangle # output low result ++ vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 ++ ++ # high round ++ bl _vpaes_schedule_round ++ bdz Lschedule_mangle_last # dec %esi ++ bl _vpaes_schedule_mangle ++ ++ # low round. swap xmm7 and xmm6 ++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 ++ vmr v5, v7 # vmovdqa %xmm7, %xmm5 ++ vmr v7, v6 # vmovdqa %xmm6, %xmm7 ++ bl _vpaes_schedule_low_round ++ vmr v7, v5 # vmovdqa %xmm5, %xmm7 ++ ++ b Loop_schedule_256 ++## ++## .aes_schedule_mangle_last ++## ++## Mangler for last round of key schedule ++## Mangles %xmm0 ++## when encrypting, outputs out(%xmm0) ^ 63 ++## when decrypting, outputs unskew(%xmm0) ++## ++## Always called right before return... jumps to cleanup and exits ++## ++.align 4 ++Lschedule_mangle_last: ++ # schedule last round key from xmm0 ++ li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11 ++ li r9, 0x2f0 ++ bne $dir, Lschedule_mangle_last_dec ++ ++ # encrypting ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1 ++ li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform ++ li r9, 0x2d0 # prepare to output transform ++ vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute ++ ++ lvx $iptlo, r11, r12 # reload $ipt ++ lvx $ipthi, r9, r12 ++ addi $out, $out, 16 # add \$16, %rdx ++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform # output transform ++ ++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v2, 0, $out ++ ++ addi $out, $out, 15 # 15 is not typo ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ b Lschedule_mangle_done ++ ++.align 4 ++Lschedule_mangle_last_dec: ++ lvx $iptlo, r11, r12 # reload $ipt ++ lvx $ipthi, r9, r12 ++ addi $out, $out, -16 # add \$-16, %rdx ++ vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform # output transform ++ ++ #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key ++ vperm v0, v0, v0, $outperm # rotate right/left ++ vsel v2, $outhead, v0, $outmask ++ vmr $outhead, v0 ++ stvx v2, 0, $out ++ ++ addi $out, $out, -15 # -15 is not typo ++ lvx v1, 0, $out # redundant in aligned case ++ vsel v1, $outhead, v1, $outmask ++ stvx v1, 0, $out ++ ++Lschedule_mangle_done: ++ mtlr r7 ++ # cleanup ++ vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0 ++ vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1 ++ vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2 ++ vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3 ++ vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 ++ vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5 ++ vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6 ++ vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7 ++ ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_192_smear ++## ++## Smear the short, low side in the 192-bit key schedule. ++## ++## Inputs: ++## %xmm7: high side, b a x y ++## %xmm6: low side, d c 0 0 ++## %xmm13: 0 ++## ++## Outputs: ++## %xmm6: b+c+d b+c 0 0 ++## %xmm0: b+c+d b+c b a ++## ++.align 4 ++_vpaes_schedule_192_smear: ++ ?vspltw v0, v7, 3 ++ ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 ++ ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a ++ vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 ++ vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a ++ vmr v0, v6 ++ ?vsldoi v6, v6, v9, 8 ++ ?vsldoi v6, v9, v6, 8 # clobber low side with zeros ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_round ++## ++## Runs one main round of the key schedule on %xmm0, %xmm7 ++## ++## Specifically, runs subbytes on the high dword of %xmm0 ++## then rotates it by one byte and xors into the low dword of ++## %xmm7. ++## ++## Adds rcon from low byte of %xmm8, then rotates %xmm8 for ++## next rcon. ++## ++## Smears the dwords of %xmm7 by xoring the low into the ++## second low, result into third, result into highest. ++## ++## Returns results in %xmm7 = %xmm0. ++## Clobbers %xmm1-%xmm4, %r11. ++## ++.align 4 ++_vpaes_schedule_round: ++ # extract rcon from xmm8 ++ #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4 ++ ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1 ++ ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8 ++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 ++ ++ # rotate ++ ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 ++ ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0 ++ ++ # fall through... ++ ++ # low round: same as high round, but no rotation and no rcon. ++_vpaes_schedule_low_round: ++ # smear xmm7 ++ ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1 ++ vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7 ++ vspltisb v1, 0x0f # 0x0f..0f ++ ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4 ++ ++ # subbytes ++ vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i ++ vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7 ++ vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7 ++ vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak ++ vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak ++ vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io ++ vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo ++ vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou ++ vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t ++ vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output ++ ++ # add in smeared stuff ++ vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0 ++ vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_transform ++## ++## Linear-transform %xmm0 according to tables at (%r11) ++## ++## Requires that %xmm9 = 0x0F0F... as in preheat ++## Output in %xmm0 ++## Clobbers %xmm2 ++## ++.align 4 ++_vpaes_schedule_transform: ++ #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1 ++ vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 ++ # vmovdqa (%r11), %xmm2 # lo ++ vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2 ++ # vmovdqa 16(%r11), %xmm1 # hi ++ vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0 ++ vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++## ++## .aes_schedule_mangle ++## ++## Mangle xmm0 from (basis-transformed) standard version ++## to our version. ++## ++## On encrypt, ++## xor with 0x63 ++## multiply by circulant 0,1,1,1 ++## apply shiftrows transform ++## ++## On decrypt, ++## xor with 0x63 ++## multiply by "inverse mixcolumns" circulant E,B,D,9 ++## deskew ++## apply shiftrows transform ++## ++## ++## Writes out to (%rdx), and increments or decrements it ++## Keeps track of round number mod 4 in %r8 ++## Preserves xmm0 ++## Clobbers xmm1-xmm5 ++## ++.align 4 ++_vpaes_schedule_mangle: ++ #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later ++ # vmovdqa .Lk_mc_forward(%rip),%xmm5 ++ bne $dir, Lschedule_mangle_dec ++ ++ # encrypting ++ vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4 ++ addi $out, $out, 16 # add \$16, %rdx ++ vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4 ++ vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1 ++ vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3 ++ vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4 ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3 ++ ++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ addi r8, r8, -16 # add \$-16, %r8 ++ andi. r8, r8, 0x30 # and \$0x30, %r8 ++ ++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v1, v3, v3, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ blr ++ ++.align 4 ++Lschedule_mangle_dec: ++ # inverse mix columns ++ # lea .Lk_dksd(%rip),%r11 ++ vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi ++ #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo ++ ++ # vmovdqa 0x00(%r11), %xmm2 ++ vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ # vmovdqa 0x10(%r11), %xmm3 ++ vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ ++ # vmovdqa 0x20(%r11), %xmm2 ++ vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ # vmovdqa 0x30(%r11), %xmm3 ++ vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ ++ # vmovdqa 0x40(%r11), %xmm2 ++ vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ # vmovdqa 0x50(%r11), %xmm3 ++ vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 ++ ++ # vmovdqa 0x60(%r11), %xmm2 ++ vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2 ++ vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3 ++ # vmovdqa 0x70(%r11), %xmm4 ++ vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4 ++ lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1 ++ vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2 ++ vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3 ++ ++ addi $out, $out, -16 # add \$-16, %rdx ++ ++ vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3 ++ addi r8, r8, -16 # add \$-16, %r8 ++ andi. r8, r8, 0x30 # and \$0x30, %r8 ++ ++ #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx) ++ vperm v1, v3, v3, $outperm # rotate right/left ++ vsel v2, $outhead, v1, $outmask ++ vmr $outhead, v1 ++ stvx v2, 0, $out ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ ++.globl .vpaes_set_encrypt_key ++.align 5 ++.vpaes_set_encrypt_key: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r0 ++ mfspr r6, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r6,`$FRAME-4`($sp) # save vrsave ++ li r7, -1 ++ $PUSH r0, `$FRAME+$LRSAVE`($sp) ++ mtspr 256, r7 # preserve all AltiVec registers ++ ++ srwi r9, $bits, 5 # shr \$5,%eax ++ addi r9, r9, 6 # add \$5,%eax ++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ cmplw $dir, $bits, $bits # set encrypt direction ++ li r8, 0x30 # mov \$0x30,%r8d ++ bl _vpaes_schedule_core ++ ++ $POP r0, `$FRAME+$LRSAVE`($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtspr 256, r6 # restore vrsave ++ mtlr r0 ++ xor r3, r3, r3 ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key ++ ++.globl .vpaes_set_decrypt_key ++.align 4 ++.vpaes_set_decrypt_key: ++ $STU $sp,-$FRAME($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mflr r0 ++ mfspr r6, 256 # save vrsave ++ stvx v20,r10,$sp ++ addi r10,r10,32 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ stw r6,`$FRAME-4`($sp) # save vrsave ++ li r7, -1 ++ $PUSH r0, `$FRAME+$LRSAVE`($sp) ++ mtspr 256, r7 # preserve all AltiVec registers ++ ++ srwi r9, $bits, 5 # shr \$5,%eax ++ addi r9, r9, 6 # add \$5,%eax ++ stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ slwi r9, r9, 4 # shl \$4,%eax ++ add $out, $out, r9 # lea (%rdx,%rax),%rdx ++ ++ cmplwi $dir, $bits, 0 # set decrypt direction ++ srwi r8, $bits, 1 # shr \$1,%r8d ++ andi. r8, r8, 32 # and \$32,%r8d ++ xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32 ++ bl _vpaes_schedule_core ++ ++ $POP r0, `$FRAME+$LRSAVE`($sp) ++ li r10,`15+6*$SIZE_T` ++ li r11,`31+6*$SIZE_T` ++ mtspr 256, r6 # restore vrsave ++ mtlr r0 ++ xor r3, r3, r3 ++ lvx v20,r10,$sp ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ addi $sp,$sp,$FRAME ++ blr ++ .long 0 ++ .byte 0,12,0x04,1,0x80,0,3,0 ++ .long 0 ++.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key ++___ ++} ++ ++my $consts=1; ++foreach (split("\n",$code)) { ++ s/\`([^\`]*)\`/eval $1/geo; ++ ++ # constants table endian-specific conversion ++ if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) { ++ my $conv=$2; ++ my @bytes=(); ++ ++ # convert to endian-agnostic format ++ foreach (split(/,\s+/,$1)) { ++ my $l = /^0/?oct:int; ++ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; ++ } ++ ++ # little-endian conversion ++ if ($flavour =~ /le$/o) { ++ SWITCH: for($conv) { ++ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; ++ /\?rev/ && do { @bytes=reverse(@bytes); last; }; ++ } ++ } ++ ++ #emit ++ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; ++ next; ++ } ++ $consts=0 if (m/Lconsts:/o); # end of table ++ ++ # instructions prefixed with '?' are endian-specific and need ++ # to be adjusted accordingly... ++ if ($flavour =~ /le$/o) { # little-endian ++ s/\?lvsr/lvsl/o or ++ s/\?lvsl/lvsr/o or ++ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or ++ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or ++ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; ++ } else { # big-endian ++ s/\?([a-z]+)/$1/o; ++ } ++ ++ print $_,"\n"; ++} ++ ++close STDOUT; +diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl +index 41f2e46..bd7f45b 100644 +--- a/crypto/aes/asm/vpaes-x86_64.pl ++++ b/crypto/aes/asm/vpaes-x86_64.pl +@@ -1060,7 +1060,7 @@ _vpaes_consts: + .Lk_dsbo: # decryption sbox final output + .quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D + .quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +-.asciz "Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" ++.asciz "Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)" + .align 64 + .size _vpaes_consts,.-_vpaes_consts + ___ +diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl +index f9b6992..da69c6a 100644 +--- a/crypto/bn/asm/ppc-mont.pl ++++ b/crypto/bn/asm/ppc-mont.pl +@@ -325,6 +325,7 @@ Lcopy: ; copy or in-place refresh + .long 0 + .byte 0,12,4,0,0x80,12,6,0 + .long 0 ++.size .bn_mul_mont_int,.-.bn_mul_mont_int + + .asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by " + ___ +diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl +index 1249ce2..04df1fe 100644 +--- a/crypto/bn/asm/ppc.pl ++++ b/crypto/bn/asm/ppc.pl +@@ -392,6 +392,7 @@ $data=< for the OpenSSL ++# Written by Andy Polyakov for the OpenSSL + # project. The module is, however, dual licensed under OpenSSL and + # CRYPTOGAMS licenses depending on where you obtain it. For further + # details see http://www.openssl.org/~appro/cryptogams/. +@@ -65,6 +65,14 @@ + # others alternative would be to break dependence on upper halves of + # GPRs by sticking to 32-bit integer operations... + ++# December 2012 ++ ++# Remove above mentioned dependence on GPRs' upper halves in 32-bit ++# build. No signal masking overhead, but integer instructions are ++# *more* numerous... It's still "universally" faster than 32-bit ++# ppc-mont.pl, but improvement coefficient is not as impressive ++# for longer keys... ++ + $flavour = shift; + + if ($flavour =~ /32/) { +@@ -110,6 +118,9 @@ $tp="r10"; + $j="r11"; + $i="r12"; + # non-volatile registers ++$c1="r19"; ++$n1="r20"; ++$a1="r21"; + $nap_d="r22"; # interleaved ap and np in double format + $a0="r23"; # ap[0] + $t0="r24"; # temporary registers +@@ -180,8 +191,8 @@ $T3a="f30"; $T3b="f31"; + # . . + # +-------------------------------+ + # . . +-# -12*size_t +-------------------------------+ +-# | 10 saved gpr, r22-r31 | ++# -13*size_t +-------------------------------+ ++# | 13 saved gpr, r19-r31 | + # . . + # . . + # -12*8 +-------------------------------+ +@@ -215,6 +226,9 @@ $code=<<___; + mr $i,$sp + $STUX $sp,$sp,$tp ; alloca + ++ $PUSH r19,`-12*8-13*$SIZE_T`($i) ++ $PUSH r20,`-12*8-12*$SIZE_T`($i) ++ $PUSH r21,`-12*8-11*$SIZE_T`($i) + $PUSH r22,`-12*8-10*$SIZE_T`($i) + $PUSH r23,`-12*8-9*$SIZE_T`($i) + $PUSH r24,`-12*8-8*$SIZE_T`($i) +@@ -237,40 +251,26 @@ $code=<<___; + stfd f29,`-3*8`($i) + stfd f30,`-2*8`($i) + stfd f31,`-1*8`($i) +-___ +-$code.=<<___ if ($SIZE_T==8); +- ld $a0,0($ap) ; pull ap[0] value +- ld $n0,0($n0) ; pull n0[0] value +- ld $t3,0($bp) ; bp[0] +-___ +-$code.=<<___ if ($SIZE_T==4); +- mr $t1,$n0 +- lwz $a0,0($ap) ; pull ap[0,1] value +- lwz $t0,4($ap) +- lwz $n0,0($t1) ; pull n0[0,1] value +- lwz $t1,4($t1) +- lwz $t3,0($bp) ; bp[0,1] +- lwz $t2,4($bp) +- insrdi $a0,$t0,32,0 +- insrdi $n0,$t1,32,0 +- insrdi $t3,$t2,32,0 +-___ +-$code.=<<___; ++ + addi $tp,$sp,`$FRAME+$TRANSFER+8+64` + li $i,-64 + add $nap_d,$tp,$num + and $nap_d,$nap_d,$i ; align to 64 bytes +- +- mulld $t7,$a0,$t3 ; ap[0]*bp[0] + ; nap_d is off by 1, because it's used with stfdu/lfdu + addi $nap_d,$nap_d,-8 + srwi $j,$num,`3+1` ; counter register, num/2 +- mulld $t7,$t7,$n0 ; tp[0]*n0 + addi $j,$j,-1 + addi $tp,$sp,`$FRAME+$TRANSFER-8` + li $carry,0 + mtctr $j ++___ ++ ++$code.=<<___ if ($SIZE_T==8); ++ ld $a0,0($ap) ; pull ap[0] value ++ ld $t3,0($bp) ; bp[0] ++ ld $n0,0($n0) ; pull n0[0] value + ++ mulld $t7,$a0,$t3 ; ap[0]*bp[0] + ; transfer bp[0] to FPU as 4x16-bit values + extrdi $t0,$t3,16,48 + extrdi $t1,$t3,16,32 +@@ -280,6 +280,8 @@ $code.=<<___; + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) ++ ++ mulld $t7,$t7,$n0 ; tp[0]*n0 + ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values + extrdi $t4,$t7,16,48 + extrdi $t5,$t7,16,32 +@@ -289,21 +291,61 @@ $code.=<<___; + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) +-___ +-$code.=<<___ if ($SIZE_T==8); +- lwz $t0,4($ap) ; load a[j] as 32-bit word pair +- lwz $t1,0($ap) +- lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair ++ ++ extrdi $t0,$a0,32,32 ; lwz $t0,4($ap) ++ extrdi $t1,$a0,32,0 ; lwz $t1,0($ap) ++ lwz $t2,12($ap) ; load a[1] as 32-bit word pair + lwz $t3,8($ap) +- lwz $t4,4($np) ; load n[j] as 32-bit word pair ++ lwz $t4,4($np) ; load n[0] as 32-bit word pair + lwz $t5,0($np) +- lwz $t6,12($np) ; load n[j+1] as 32-bit word pair ++ lwz $t6,12($np) ; load n[1] as 32-bit word pair + lwz $t7,8($np) + ___ + $code.=<<___ if ($SIZE_T==4); +- lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs +- lwz $t1,4($ap) +- lwz $t2,8($ap) ++ lwz $a0,0($ap) ; pull ap[0,1] value ++ mr $n1,$n0 ++ lwz $a1,4($ap) ++ li $c1,0 ++ lwz $t1,0($bp) ; bp[0,1] ++ lwz $t3,4($bp) ++ lwz $n0,0($n1) ; pull n0[0,1] value ++ lwz $n1,4($n1) ++ ++ mullw $t4,$a0,$t1 ; mulld ap[0]*bp[0] ++ mulhwu $t5,$a0,$t1 ++ mullw $t6,$a1,$t1 ++ mullw $t7,$a0,$t3 ++ add $t5,$t5,$t6 ++ add $t5,$t5,$t7 ++ ; transfer bp[0] to FPU as 4x16-bit values ++ extrwi $t0,$t1,16,16 ++ extrwi $t1,$t1,16,0 ++ extrwi $t2,$t3,16,16 ++ extrwi $t3,$t3,16,0 ++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build ++ std $t1,`$FRAME+8`($sp) ++ std $t2,`$FRAME+16`($sp) ++ std $t3,`$FRAME+24`($sp) ++ ++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0 ++ mulhwu $t1,$t4,$n0 ++ mullw $t2,$t5,$n0 ++ mullw $t3,$t4,$n1 ++ add $t1,$t1,$t2 ++ add $t1,$t1,$t3 ++ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values ++ extrwi $t4,$t0,16,16 ++ extrwi $t5,$t0,16,0 ++ extrwi $t6,$t1,16,16 ++ extrwi $t7,$t1,16,0 ++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build ++ std $t5,`$FRAME+40`($sp) ++ std $t6,`$FRAME+48`($sp) ++ std $t7,`$FRAME+56`($sp) ++ ++ mr $t0,$a0 ; lwz $t0,0($ap) ++ mr $t1,$a1 ; lwz $t1,4($ap) ++ lwz $t2,8($ap) ; load a[j..j+3] as 32-bit word pairs + lwz $t3,12($ap) + lwz $t4,0($np) ; load n[j..j+3] as 32-bit word pairs + lwz $t5,4($np) +@@ -319,7 +361,7 @@ $code.=<<___; + lfd $nb,`$FRAME+40`($sp) + lfd $nc,`$FRAME+48`($sp) + lfd $nd,`$FRAME+56`($sp) +- std $t0,`$FRAME+64`($sp) ++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build + std $t1,`$FRAME+72`($sp) + std $t2,`$FRAME+80`($sp) + std $t3,`$FRAME+88`($sp) +@@ -441,7 +483,7 @@ $code.=<<___ if ($SIZE_T==4); + lwz $t7,12($np) + ___ + $code.=<<___; +- std $t0,`$FRAME+64`($sp) ++ std $t0,`$FRAME+64`($sp) ; yes, std even in 32-bit build + std $t1,`$FRAME+72`($sp) + std $t2,`$FRAME+80`($sp) + std $t3,`$FRAME+88`($sp) +@@ -449,6 +491,9 @@ $code.=<<___; + std $t5,`$FRAME+104`($sp) + std $t6,`$FRAME+112`($sp) + std $t7,`$FRAME+120`($sp) ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -457,6 +502,20 @@ $code.=<<___; + ld $t5,`$FRAME+40`($sp) + ld $t6,`$FRAME+48`($sp) + ld $t7,`$FRAME+56`($sp) ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++___ ++} ++$code.=<<___; + lfd $A0,`$FRAME+64`($sp) + lfd $A1,`$FRAME+72`($sp) + lfd $A2,`$FRAME+80`($sp) +@@ -488,7 +547,9 @@ $code.=<<___; + fmadd $T0b,$A0,$bb,$dotb + stfd $A2,24($nap_d) ; save a[j+1] in double format + stfd $A3,32($nap_d) +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + fmadd $T1a,$A0,$bc,$T1a + fmadd $T1b,$A0,$bd,$T1b + fmadd $T2a,$A1,$bc,$T2a +@@ -561,11 +622,123 @@ $code.=<<___; + stfd $T3b,`$FRAME+56`($sp) + std $t0,8($tp) ; tp[j-1] + stdu $t4,16($tp) ; tp[j] ++___ ++} else { ++$code.=<<___; ++ fmadd $T1a,$A0,$bc,$T1a ++ fmadd $T1b,$A0,$bd,$T1b ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fmadd $T2a,$A1,$bc,$T2a ++ fmadd $T2b,$A1,$bd,$T2b ++ stfd $N0,40($nap_d) ; save n[j] in double format ++ stfd $N1,48($nap_d) ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ fmadd $T3a,$A2,$bc,$T3a ++ fmadd $T3b,$A2,$bd,$T3b ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmul $dota,$A3,$bc ++ fmul $dotb,$A3,$bd ++ stfd $N2,56($nap_d) ; save n[j+1] in double format ++ stfdu $N3,64($nap_d) ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ ++ fmadd $T1a,$N1,$na,$T1a ++ fmadd $T1b,$N1,$nb,$T1b ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fmadd $T2a,$N2,$na,$T2a ++ fmadd $T2b,$N2,$nb,$T2b ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ fmadd $T3a,$N3,$na,$T3a ++ fmadd $T3b,$N3,$nb,$T3b ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fmadd $T0a,$N0,$na,$T0a ++ fmadd $T0b,$N0,$nb,$T0b ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ ++ fmadd $T1a,$N0,$nc,$T1a ++ fmadd $T1b,$N0,$nd,$T1b ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmadd $T2a,$N1,$nc,$T2a ++ fmadd $T2b,$N1,$nd,$T2b ++ stw $t0,12($tp) ; tp[j-1] ++ stw $t4,8($tp) ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ fmadd $T3a,$N2,$nc,$T3a ++ fmadd $T3b,$N2,$nd,$T3b ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fmadd $dota,$N3,$nc,$dota ++ fmadd $dotb,$N3,$nd,$dotb ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ ++ fctid $T0a,$T0a ++ fctid $T0b,$T0b ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fctid $T1a,$T1a ++ fctid $T1b,$T1b ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ fctid $T2a,$T2a ++ fctid $T2b,$T2b ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fctid $T3a,$T3a ++ fctid $T3b,$T3b ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ ++ stfd $T0a,`$FRAME+0`($sp) ++ stfd $T0b,`$FRAME+8`($sp) ++ stfd $T1a,`$FRAME+16`($sp) ++ stfd $T1b,`$FRAME+24`($sp) ++ stfd $T2a,`$FRAME+32`($sp) ++ stfd $T2b,`$FRAME+40`($sp) ++ stfd $T3a,`$FRAME+48`($sp) ++ stfd $T3b,`$FRAME+56`($sp) ++ stw $t2,20($tp) ; tp[j] ++ stwu $t0,16($tp) ++___ ++} ++$code.=<<___; + bdnz- L1st + + fctid $dota,$dota + fctid $dotb,$dotb +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -611,33 +784,117 @@ $code.=<<___; + insrdi $t6,$t7,48,0 + srdi $ovf,$t7,48 + std $t6,8($tp) ; tp[num-1] ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ stfd $dota,`$FRAME+64`($sp) ++ stfd $dotb,`$FRAME+72`($sp) + ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ stw $t0,12($tp) ; tp[j-1] ++ stw $t4,8($tp) ++ ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ stw $t2,20($tp) ; tp[j] ++ stwu $t0,16($tp) ++ ++ lwz $t7,`$FRAME+64`($sp) ++ lwz $t6,`$FRAME+68`($sp) ++ lwz $t5,`$FRAME+72`($sp) ++ lwz $t4,`$FRAME+76`($sp) ++ ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ ++ insrwi $t6,$t4,16,0 ++ srwi $t4,$t4,16 ++ insrwi $t4,$t5,16,0 ++ srwi $ovf,$t5,16 ++ stw $t6,12($tp) ; tp[num-1] ++ stw $t4,8($tp) ++___ ++} ++$code.=<<___; + slwi $t7,$num,2 + subf $nap_d,$t7,$nap_d ; rewind pointer + + li $i,8 ; i=1 + .align 5 + Louter: +-___ +-$code.=<<___ if ($SIZE_T==8); +- ldx $t3,$bp,$i ; bp[i] +-___ +-$code.=<<___ if ($SIZE_T==4); +- add $t0,$bp,$i +- lwz $t3,0($t0) ; bp[i,i+1] +- lwz $t0,4($t0) +- insrdi $t3,$t0,32,0 +-___ +-$code.=<<___; +- ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] +- mulld $t7,$a0,$t3 ; ap[0]*bp[i] +- + addi $tp,$sp,`$FRAME+$TRANSFER` +- add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] + li $carry,0 +- mulld $t7,$t7,$n0 ; tp[0]*n0 + mtctr $j ++___ ++$code.=<<___ if ($SIZE_T==8); ++ ldx $t3,$bp,$i ; bp[i] + ++ ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0] ++ mulld $t7,$a0,$t3 ; ap[0]*bp[i] ++ add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0] + ; transfer bp[i] to FPU as 4x16-bit values + extrdi $t0,$t3,16,48 + extrdi $t1,$t3,16,32 +@@ -647,6 +904,8 @@ $code.=<<___; + std $t1,`$FRAME+8`($sp) + std $t2,`$FRAME+16`($sp) + std $t3,`$FRAME+24`($sp) ++ ++ mulld $t7,$t7,$n0 ; tp[0]*n0 + ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values + extrdi $t4,$t7,16,48 + extrdi $t5,$t7,16,32 +@@ -656,7 +915,50 @@ $code.=<<___; + std $t5,`$FRAME+40`($sp) + std $t6,`$FRAME+48`($sp) + std $t7,`$FRAME+56`($sp) ++___ ++$code.=<<___ if ($SIZE_T==4); ++ add $t0,$bp,$i ++ li $c1,0 ++ lwz $t1,0($t0) ; bp[i,i+1] ++ lwz $t3,4($t0) ++ ++ mullw $t4,$a0,$t1 ; ap[0]*bp[i] ++ lwz $t0,`$FRAME+$TRANSFER+8+4`($sp) ; tp[0] ++ mulhwu $t5,$a0,$t1 ++ lwz $t2,`$FRAME+$TRANSFER+8`($sp) ; tp[0] ++ mullw $t6,$a1,$t1 ++ mullw $t7,$a0,$t3 ++ add $t5,$t5,$t6 ++ add $t5,$t5,$t7 ++ addc $t4,$t4,$t0 ; ap[0]*bp[i]+tp[0] ++ adde $t5,$t5,$t2 ++ ; transfer bp[i] to FPU as 4x16-bit values ++ extrwi $t0,$t1,16,16 ++ extrwi $t1,$t1,16,0 ++ extrwi $t2,$t3,16,16 ++ extrwi $t3,$t3,16,0 ++ std $t0,`$FRAME+0`($sp) ; yes, std in 32-bit build ++ std $t1,`$FRAME+8`($sp) ++ std $t2,`$FRAME+16`($sp) ++ std $t3,`$FRAME+24`($sp) + ++ mullw $t0,$t4,$n0 ; mulld tp[0]*n0 ++ mulhwu $t1,$t4,$n0 ++ mullw $t2,$t5,$n0 ++ mullw $t3,$t4,$n1 ++ add $t1,$t1,$t2 ++ add $t1,$t1,$t3 ++ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values ++ extrwi $t4,$t0,16,16 ++ extrwi $t5,$t0,16,0 ++ extrwi $t6,$t1,16,16 ++ extrwi $t7,$t1,16,0 ++ std $t4,`$FRAME+32`($sp) ; yes, std in 32-bit build ++ std $t5,`$FRAME+40`($sp) ++ std $t6,`$FRAME+48`($sp) ++ std $t7,`$FRAME+56`($sp) ++___ ++$code.=<<___; + lfd $A0,8($nap_d) ; load a[j] in double format + lfd $A1,16($nap_d) + lfd $A2,24($nap_d) ; load a[j+1] in double format +@@ -769,7 +1071,9 @@ Linner: + fmul $dotb,$A3,$bd + lfd $A2,24($nap_d) ; load a[j+1] in double format + lfd $A3,32($nap_d) +- ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + fmadd $T1a,$N1,$na,$T1a + fmadd $T1b,$N1,$nb,$T1b + ld $t0,`$FRAME+0`($sp) +@@ -856,10 +1160,131 @@ $code.=<<___; + addze $carry,$carry + std $t3,-16($tp) ; tp[j-1] + std $t5,-8($tp) ; tp[j] ++___ ++} else { ++$code.=<<___; ++ fmadd $T1a,$N1,$na,$T1a ++ fmadd $T1b,$N1,$nb,$T1b ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ fmadd $T2a,$N2,$na,$T2a ++ fmadd $T2b,$N2,$nb,$T2b ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ fmadd $T3a,$N3,$na,$T3a ++ fmadd $T3b,$N3,$nb,$T3b ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ fmadd $T0a,$N0,$na,$T0a ++ fmadd $T0b,$N0,$nb,$T0b ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ srwi $c1,$t1,16 ++ insrwi $carry,$t1,16,0 ++ ++ fmadd $T1a,$N0,$nc,$T1a ++ fmadd $T1b,$N0,$nd,$T1b ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ fmadd $T2a,$N1,$nc,$T2a ++ fmadd $T2b,$N1,$nd,$T2b ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ fmadd $T3a,$N2,$nc,$T3a ++ fmadd $T3b,$N2,$nd,$T3b ++ lwz $t2,12($tp) ; tp[j] ++ lwz $t3,8($tp) ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ fmadd $dota,$N3,$nc,$dota ++ fmadd $dotb,$N3,$nd,$dotb ++ srwi $c1,$t5,16 ++ insrwi $carry,$t5,16,0 ++ ++ fctid $T0a,$T0a ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ fctid $T0b,$T0b ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ srwi $c1,$t7,16 ++ insrwi $carry,$t7,16,0 ++ fctid $T1a,$T1a ++ addc $t0,$t0,$t2 ++ adde $t4,$t4,$t3 ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ fctid $T1b,$T1b ++ addze $carry,$carry ++ addze $c1,$c1 ++ stw $t0,4($tp) ; tp[j-1] ++ stw $t4,0($tp) ++ fctid $T2a,$T2a ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ fctid $T2b,$T2b ++ srwi $c1,$t3,16 ++ insrwi $carry,$t3,16,0 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ fctid $T3a,$T3a ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ fctid $T3b,$T3b ++ ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ lwz $t6,20($tp) ++ lwzu $t7,16($tp) ++ addc $t0,$t0,$carry ++ stfd $T0a,`$FRAME+0`($sp) ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ stfd $T0b,`$FRAME+8`($sp) ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ stfd $T1a,`$FRAME+16`($sp) ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ stfd $T1b,`$FRAME+24`($sp) ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ ++ addc $t2,$t2,$t6 ++ stfd $T2a,`$FRAME+32`($sp) ++ adde $t0,$t0,$t7 ++ stfd $T2b,`$FRAME+40`($sp) ++ addze $carry,$carry ++ stfd $T3a,`$FRAME+48`($sp) ++ addze $c1,$c1 ++ stfd $T3b,`$FRAME+56`($sp) ++ stw $t2,-4($tp) ; tp[j] ++ stw $t0,-8($tp) ++___ ++} ++$code.=<<___; + bdnz- Linner + + fctid $dota,$dota + fctid $dotb,$dotb ++___ ++if ($SIZE_T==8 or $flavour =~ /osx/) { ++$code.=<<___; + ld $t0,`$FRAME+0`($sp) + ld $t1,`$FRAME+8`($sp) + ld $t2,`$FRAME+16`($sp) +@@ -926,7 +1351,116 @@ $code.=<<___; + insrdi $t6,$t7,48,0 + srdi $ovf,$t7,48 + std $t6,0($tp) ; tp[num-1] ++___ ++} else { ++$code.=<<___; ++ lwz $t1,`$FRAME+0`($sp) ++ lwz $t0,`$FRAME+4`($sp) ++ lwz $t3,`$FRAME+8`($sp) ++ lwz $t2,`$FRAME+12`($sp) ++ lwz $t5,`$FRAME+16`($sp) ++ lwz $t4,`$FRAME+20`($sp) ++ lwz $t7,`$FRAME+24`($sp) ++ lwz $t6,`$FRAME+28`($sp) ++ stfd $dota,`$FRAME+64`($sp) ++ stfd $dotb,`$FRAME+72`($sp) + ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $t0,$t2,16,0 ; 0..31 bits ++ lwz $t2,12($tp) ; tp[j] ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ lwz $t3,8($tp) ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t4,$t6,16,0 ; 32..63 bits ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ ++ addc $t0,$t0,$t2 ++ adde $t4,$t4,$t3 ++ addze $carry,$carry ++ addze $c1,$c1 ++ stw $t0,4($tp) ; tp[j-1] ++ stw $t4,0($tp) ++ ++ lwz $t3,`$FRAME+32`($sp) ; permuted $t1 ++ lwz $t2,`$FRAME+36`($sp) ; permuted $t0 ++ lwz $t7,`$FRAME+40`($sp) ; permuted $t3 ++ lwz $t6,`$FRAME+44`($sp) ; permuted $t2 ++ lwz $t1,`$FRAME+48`($sp) ; permuted $t5 ++ lwz $t0,`$FRAME+52`($sp) ; permuted $t4 ++ lwz $t5,`$FRAME+56`($sp) ; permuted $t7 ++ lwz $t4,`$FRAME+60`($sp) ; permuted $t6 ++ ++ addc $t2,$t2,$carry ++ adde $t3,$t3,$c1 ++ srwi $carry,$t2,16 ++ insrwi $carry,$t3,16,0 ++ srwi $c1,$t3,16 ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ srwi $carry,$t6,16 ++ insrwi $t2,$t6,16,0 ; 64..95 bits ++ lwz $t6,20($tp) ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ lwzu $t7,16($tp) ++ addc $t0,$t0,$carry ++ adde $t1,$t1,$c1 ++ srwi $carry,$t0,16 ++ insrwi $carry,$t1,16,0 ++ srwi $c1,$t1,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ srwi $carry,$t4,16 ++ insrwi $t0,$t4,16,0 ; 96..127 bits ++ insrwi $carry,$t5,16,0 ++ srwi $c1,$t5,16 ++ ++ addc $t2,$t2,$t6 ++ adde $t0,$t0,$t7 ++ lwz $t7,`$FRAME+64`($sp) ++ lwz $t6,`$FRAME+68`($sp) ++ addze $carry,$carry ++ addze $c1,$c1 ++ lwz $t5,`$FRAME+72`($sp) ++ lwz $t4,`$FRAME+76`($sp) ++ ++ addc $t6,$t6,$carry ++ adde $t7,$t7,$c1 ++ stw $t2,-4($tp) ; tp[j] ++ stw $t0,-8($tp) ++ addc $t6,$t6,$ovf ++ addze $t7,$t7 ++ srwi $carry,$t6,16 ++ insrwi $carry,$t7,16,0 ++ srwi $c1,$t7,16 ++ addc $t4,$t4,$carry ++ adde $t5,$t5,$c1 ++ ++ insrwi $t6,$t4,16,0 ++ srwi $t4,$t4,16 ++ insrwi $t4,$t5,16,0 ++ srwi $ovf,$t5,16 ++ stw $t6,4($tp) ; tp[num-1] ++ stw $t4,0($tp) ++___ ++} ++$code.=<<___; + slwi $t7,$num,2 + addi $i,$i,8 + subf $nap_d,$t7,$nap_d ; rewind pointer +@@ -994,14 +1528,14 @@ $code.=<<___ if ($SIZE_T==4); + mtctr $j + + .align 4 +-Lsub: ld $t0,8($tp) ; load tp[j..j+3] in 64-bit word order +- ldu $t2,16($tp) ++Lsub: lwz $t0,12($tp) ; load tp[j..j+3] in 64-bit word order ++ lwz $t1,8($tp) ++ lwz $t2,20($tp) ++ lwzu $t3,16($tp) + lwz $t4,4($np) ; load np[j..j+3] in 32-bit word order + lwz $t5,8($np) + lwz $t6,12($np) + lwzu $t7,16($np) +- extrdi $t1,$t0,32,0 +- extrdi $t3,$t2,32,0 + subfe $t4,$t4,$t0 ; tp[j]-np[j] + stw $t0,4($ap) ; save tp[j..j+3] in 32-bit word order + subfe $t5,$t5,$t1 ; tp[j+1]-np[j+1] +@@ -1052,6 +1586,9 @@ ___ + $code.=<<___; + $POP $i,0($sp) + li r3,1 ; signal "handled" ++ $POP r19,`-12*8-13*$SIZE_T`($i) ++ $POP r20,`-12*8-12*$SIZE_T`($i) ++ $POP r21,`-12*8-11*$SIZE_T`($i) + $POP r22,`-12*8-10*$SIZE_T`($i) + $POP r23,`-12*8-9*$SIZE_T`($i) + $POP r24,`-12*8-8*$SIZE_T`($i) +@@ -1077,8 +1614,9 @@ $code.=<<___; + mr $sp,$i + blr + .long 0 +- .byte 0,12,4,0,0x8c,10,6,0 ++ .byte 0,12,4,0,0x8c,13,6,0 + .long 0 ++.size .$fname,.-.$fname + + .asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by " + ___ +diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c +index 1bfb5d9..51137fd 100644 +--- a/crypto/evp/e_aes.c ++++ b/crypto/evp/e_aes.c +@@ -153,6 +153,20 @@ void AES_xts_decrypt(const char *inp,char *out,size_t len, + const unsigned char iv[16]); + #endif + ++#if defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) ++# include "ppc_arch.h" ++# ifdef VPAES_ASM ++# define VPAES_CAPABLE (OPENSSL_ppccap_P & PPC_ALTIVEC) ++# endif ++# define HWAES_CAPABLE (OPENSSL_ppccap_P & PPC_CRYPTO207) ++# define HWAES_set_encrypt_key aes_p8_set_encrypt_key ++# define HWAES_set_decrypt_key aes_p8_set_decrypt_key ++# define HWAES_encrypt aes_p8_encrypt ++# define HWAES_decrypt aes_p8_decrypt ++# define HWAES_cbc_encrypt aes_p8_cbc_encrypt ++# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks ++#endif ++ + #if defined(AES_ASM) && !defined(I386_ONLY) && ( \ + ((defined(__i386) || defined(__i386__) || \ + defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \ +diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile +index c825b12..e684e02 100644 +--- a/crypto/modes/Makefile ++++ b/crypto/modes/Makefile +@@ -56,6 +56,10 @@ ghash-alpha.s: asm/ghash-alpha.pl + $(PERL) $< | $(CC) -E - | tee $@ > /dev/null + ghash-parisc.s: asm/ghash-parisc.pl + $(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@ ++ghashv8-armx.S: asm/ghashv8-armx.pl ++ $(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@ ++ghashp8-ppc.s: asm/ghashp8-ppc.pl ++ $(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@ + + # GNU make "catch all" + ghash-%.S: asm/ghash-%.pl; $(PERL) $< $(PERLASM_SCHEME) $@ +diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl +new file mode 100755 +index 0000000..e76a58c +--- /dev/null ++++ b/crypto/modes/asm/ghashp8-ppc.pl +@@ -0,0 +1,234 @@ ++#!/usr/bin/env perl ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# GHASH for for PowerISA v2.07. ++# ++# July 2014 ++# ++# Accurate performance measurements are problematic, because it's ++# always virtualized setup with possibly throttled processor. ++# Relative comparison is therefore more informative. This initial ++# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x ++# faster than "4-bit" integer-only compiler-generated 64-bit code. ++# "Initial version" means that there is room for futher improvement. ++ ++$flavour=shift; ++$output =shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T=8; ++ $LRSAVE=2*$SIZE_T; ++ $STU="stdu"; ++ $POP="ld"; ++ $PUSH="std"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T=4; ++ $LRSAVE=$SIZE_T; ++ $STU="stwu"; ++ $POP="lwz"; ++ $PUSH="stw"; ++} else { die "nonsense $flavour"; } ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; ++ ++my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block ++ ++my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); ++my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); ++my $vrsave="r12"; ++ ++$code=<<___; ++.machine "any" ++ ++.text ++ ++.globl .gcm_init_p8 ++.align 5 ++.gcm_init_p8: ++ lis r0,0xfff0 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $H,0,r4 # load H ++ ++ vspltisb $xC2,-16 # 0xf0 ++ vspltisb $t0,1 # one ++ vaddubm $xC2,$xC2,$xC2 # 0xe0 ++ vxor $zero,$zero,$zero ++ vor $xC2,$xC2,$t0 # 0xe1 ++ vsldoi $xC2,$xC2,$zero,15 # 0xe1... ++ vsldoi $t1,$zero,$t0,1 # ...1 ++ vaddubm $xC2,$xC2,$xC2 # 0xc2... ++ vspltisb $t2,7 ++ vor $xC2,$xC2,$t1 # 0xc2....01 ++ vspltb $t1,$H,0 # most significant byte ++ vsl $H,$H,$t0 # H<<=1 ++ vsrab $t1,$t1,$t2 # broadcast carry bit ++ vand $t1,$t1,$xC2 ++ vxor $H,$H,$t1 # twisted H ++ ++ vsldoi $H,$H,$H,8 # twist even more ... ++ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 ++ vsldoi $Hl,$zero,$H,8 # ... and split ++ vsldoi $Hh,$H,$zero,8 ++ ++ stvx_u $xC2,0,r3 # save pre-computed table ++ stvx_u $Hl,r8,r3 ++ stvx_u $H, r9,r3 ++ stvx_u $Hh,r10,r3 ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .gcm_init_p8,.-.gcm_init_p8 ++ ++.globl .gcm_gmult_p8 ++.align 5 ++.gcm_gmult_p8: ++ lis r0,0xfff8 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $IN,0,$Xip # load Xi ++ ++ lvx_u $Hl,r8,$Htbl # load pre-computed table ++ le?lvsl $lemask,r0,r0 ++ lvx_u $H, r9,$Htbl ++ le?vspltisb $t0,0x07 ++ lvx_u $Hh,r10,$Htbl ++ le?vxor $lemask,$lemask,$t0 ++ lvx_u $xC2,0,$Htbl ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $zero,$zero,$zero ++ ++ vpmsumd $Xl,$IN,$Hl # H.lo�Xi.lo ++ vpmsumd $Xm,$IN,$H # H.hi�Xi.lo+H.lo�Xi.hi ++ vpmsumd $Xh,$IN,$Hh # H.hi�Xi.hi ++ ++ vpmsumd $t2,$Xl,$xC2 # 1st phase ++ ++ vsldoi $t0,$Xm,$zero,8 ++ vsldoi $t1,$zero,$Xm,8 ++ vxor $Xl,$Xl,$t0 ++ vxor $Xh,$Xh,$t1 ++ ++ vsldoi $Xl,$Xl,$Xl,8 ++ vxor $Xl,$Xl,$t2 ++ ++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase ++ vpmsumd $Xl,$Xl,$xC2 ++ vxor $t1,$t1,$Xh ++ vxor $Xl,$Xl,$t1 ++ ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ stvx_u $Xl,0,$Xip # write out Xi ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .gcm_gmult_p8,.-.gcm_gmult_p8 ++ ++.globl .gcm_ghash_p8 ++.align 5 ++.gcm_ghash_p8: ++ lis r0,0xfff8 ++ li r8,0x10 ++ mfspr $vrsave,256 ++ li r9,0x20 ++ mtspr 256,r0 ++ li r10,0x30 ++ lvx_u $Xl,0,$Xip # load Xi ++ ++ lvx_u $Hl,r8,$Htbl # load pre-computed table ++ le?lvsl $lemask,r0,r0 ++ lvx_u $H, r9,$Htbl ++ le?vspltisb $t0,0x07 ++ lvx_u $Hh,r10,$Htbl ++ le?vxor $lemask,$lemask,$t0 ++ lvx_u $xC2,0,$Htbl ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ vxor $zero,$zero,$zero ++ ++ lvx_u $IN,0,$inp ++ addi $inp,$inp,16 ++ subi $len,$len,16 ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $IN,$IN,$Xl ++ b Loop ++ ++.align 5 ++Loop: ++ subic $len,$len,16 ++ vpmsumd $Xl,$IN,$Hl # H.lo�Xi.lo ++ subfe. r0,r0,r0 # borrow?-1:0 ++ vpmsumd $Xm,$IN,$H # H.hi�Xi.lo+H.lo�Xi.hi ++ and r0,r0,$len ++ vpmsumd $Xh,$IN,$Hh # H.hi�Xi.hi ++ add $inp,$inp,r0 ++ ++ vpmsumd $t2,$Xl,$xC2 # 1st phase ++ ++ vsldoi $t0,$Xm,$zero,8 ++ vsldoi $t1,$zero,$Xm,8 ++ vxor $Xl,$Xl,$t0 ++ vxor $Xh,$Xh,$t1 ++ ++ vsldoi $Xl,$Xl,$Xl,8 ++ vxor $Xl,$Xl,$t2 ++ lvx_u $IN,0,$inp ++ addi $inp,$inp,16 ++ ++ vsldoi $t1,$Xl,$Xl,8 # 2nd phase ++ vpmsumd $Xl,$Xl,$xC2 ++ le?vperm $IN,$IN,$IN,$lemask ++ vxor $t1,$t1,$Xh ++ vxor $IN,$IN,$t1 ++ vxor $IN,$IN,$Xl ++ beq Loop # did $len-=16 borrow? ++ ++ vxor $Xl,$Xl,$t1 ++ le?vperm $Xl,$Xl,$Xl,$lemask ++ stvx_u $Xl,0,$Xip # write out Xi ++ ++ mtspr 256,$vrsave ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,4,0 ++ .long 0 ++.size .gcm_ghash_p8,.-.gcm_ghash_p8 ++ ++.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by " ++.align 2 ++___ ++ ++foreach (split("\n",$code)) { ++ if ($flavour =~ /le$/o) { # little-endian ++ s/le\?//o or ++ s/be\?/#be#/o; ++ } else { ++ s/le\?/#le#/o or ++ s/be\?//o; ++ } ++ print $_,"\n"; ++} ++ ++close STDOUT; # enforce flush +diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c +index 0e6ff8b..6f8e7ee 100644 +--- a/crypto/modes/gcm128.c ++++ b/crypto/modes/gcm128.c +@@ -671,6 +671,21 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len + void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); + void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + # endif ++# elif defined(__sparc__) || defined(__sparc) ++# include "sparc_arch.h" ++# define GHASH_ASM_SPARC ++# define GCM_FUNCREF_4BIT ++extern unsigned int OPENSSL_sparcv9cap_P[]; ++void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]); ++void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]); ++void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); ++#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC)) ++# include "ppc_arch.h" ++# define GHASH_ASM_PPC ++# define GCM_FUNCREF_4BIT ++void gcm_init_p8(u128 Htable[16],const u64 Xi[2]); ++void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]); ++void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); + # endif + #endif + +@@ -747,6 +762,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) + ctx->gmult = gcm_gmult_4bit; + ctx->ghash = gcm_ghash_4bit; + } ++# elif defined(GHASH_ASM_PPC) ++ if (OPENSSL_ppccap_P & PPC_CRYPTO207) { ++ gcm_init_p8(ctx->Htable,ctx->H.u); ++ ctx->gmult = gcm_gmult_p8; ++ ctx->ghash = gcm_ghash_p8; ++ } else { ++ gcm_init_4bit(ctx->Htable,ctx->H.u); ++ ctx->gmult = gcm_gmult_4bit; ++ ctx->ghash = gcm_ghash_4bit; ++ } + # else + gcm_init_4bit(ctx->Htable,ctx->H.u); + # endif +diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl +index a3edd98..f89e814 100755 +--- a/crypto/perlasm/ppc-xlate.pl ++++ b/crypto/perlasm/ppc-xlate.pl +@@ -27,7 +27,8 @@ my $globl = sub { + /osx/ && do { $name = "_$name"; + last; + }; +- /linux.*32/ && do { $ret .= ".globl $name\n"; ++ /linux.*(32|64le)/ ++ && do { $ret .= ".globl $name\n"; + $ret .= ".type $name,\@function"; + last; + }; +@@ -37,7 +38,6 @@ my $globl = sub { + $ret .= ".align 3\n"; + $ret .= "$name:\n"; + $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; +- $ret .= ".size $name,24\n"; + $ret .= ".previous\n"; + + $name = ".$name"; +@@ -50,7 +50,9 @@ my $globl = sub { + $ret; + }; + my $text = sub { +- ($flavour =~ /aix/) ? ".csect" : ".text"; ++ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; ++ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/); ++ $ret; + }; + my $machine = sub { + my $junk = shift; +@@ -62,9 +64,12 @@ my $machine = sub { + ".machine $arch"; + }; + my $size = sub { +- if ($flavour =~ /linux.*32/) ++ if ($flavour =~ /linux/) + { shift; +- ".size " . join(",",@_); ++ my $name = shift; $name =~ s|^[\.\_]||; ++ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name; ++ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/); ++ $ret; + } + else + { ""; } +@@ -77,6 +82,25 @@ my $asciz = sub { + else + { ""; } + }; ++my $quad = sub { ++ shift; ++ my @ret; ++ my ($hi,$lo); ++ for (@_) { ++ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) ++ { $hi=$1?"0x$1":"0"; $lo="0x$2"; } ++ elsif (/^([0-9]+)$/o) ++ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl ++ else ++ { $hi=undef; $lo=$_; } ++ ++ if (defined($hi)) ++ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } ++ else ++ { push(@ret,".quad $lo"); } ++ } ++ join("\n",@ret); ++}; + + ################################################################ + # simplified mnemonics not handled by at least one assembler +@@ -122,6 +146,46 @@ my $extrdi = sub { + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; + }; ++my $vmr = sub { ++ my ($f,$vx,$vy) = @_; ++ " vor $vx,$vy,$vy"; ++}; ++ ++# PowerISA 2.06 stuff ++sub vsxmem_op { ++ my ($f, $vrt, $ra, $rb, $op) = @_; ++ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); ++} ++# made-up unaligned memory reference AltiVec/VMX instructions ++my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x ++my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x ++my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx ++my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx ++my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x ++my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x ++ ++# PowerISA 2.07 stuff ++sub vcrypto_op { ++ my ($f, $vrt, $vra, $vrb, $op) = @_; ++ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; ++} ++my $vcipher = sub { vcrypto_op(@_, 1288); }; ++my $vcipherlast = sub { vcrypto_op(@_, 1289); }; ++my $vncipher = sub { vcrypto_op(@_, 1352); }; ++my $vncipherlast= sub { vcrypto_op(@_, 1353); }; ++my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; ++my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; ++my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; ++my $vpmsumb = sub { vcrypto_op(@_, 1032); }; ++my $vpmsumd = sub { vcrypto_op(@_, 1224); }; ++my $vpmsubh = sub { vcrypto_op(@_, 1096); }; ++my $vpmsumw = sub { vcrypto_op(@_, 1160); }; ++my $vaddudm = sub { vcrypto_op(@_, 192); }; ++ ++my $mtsle = sub { ++ my ($f, $arg) = @_; ++ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); ++}; + + while($line=<>) { + +@@ -138,7 +202,10 @@ while($line=<>) { + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; +- printf "%s:",($GLOBALS{$label} or $label) if ($label); ++ if ($label) { ++ printf "%s:",($GLOBALS{$label} or $label); ++ printf "\n.localentry\t$GLOBALS{$label},0" if ($GLOBALS{$label} && $flavour =~ /linux.*64le/); ++ } + } + + { +@@ -147,7 +214,7 @@ while($line=<>) { + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); +- $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/); ++ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } +diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h +new file mode 100644 +index 0000000..1192edf +--- /dev/null ++++ b/crypto/ppc_arch.h +@@ -0,0 +1,10 @@ ++#ifndef __PPC_ARCH_H__ ++#define __PPC_ARCH_H__ ++ ++extern unsigned int OPENSSL_ppccap_P; ++ ++#define PPC_FPU64 (1<<0) ++#define PPC_ALTIVEC (1<<1) ++#define PPC_CRYPTO207 (1<<2) ++ ++#endif +diff --git a/crypto/ppccap.c b/crypto/ppccap.c +index f71ba66..13c2ca5 100644 +--- a/crypto/ppccap.c ++++ b/crypto/ppccap.c +@@ -4,13 +4,15 @@ + #include + #include + #include ++#if defined(__linux) || defined(_AIX) ++#include ++#endif + #include + #include + +-#define PPC_FPU64 (1<<0) +-#define PPC_ALTIVEC (1<<1) ++#include "ppc_arch.h" + +-static int OPENSSL_ppccap_P = 0; ++unsigned int OPENSSL_ppccap_P = 0; + + static sigset_t all_masked; + +@@ -22,7 +24,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U + + if (sizeof(size_t)==4) + { +-#if (defined(__APPLE__) && defined(__MACH__)) ++#if 1 || (defined(__APPLE__) && defined(__MACH__)) + if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64)) + return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num); + #else +@@ -50,11 +52,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U + } + #endif + ++void sha256_block_p8(void *ctx,const void *inp,size_t len); ++void sha256_block_ppc(void *ctx,const void *inp,size_t len); ++void sha256_block_data_order(void *ctx,const void *inp,size_t len) ++ { ++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha256_block_p8(ctx,inp,len): ++ sha256_block_ppc(ctx,inp,len); ++ } ++ ++void sha512_block_p8(void *ctx,const void *inp,size_t len); ++void sha512_block_ppc(void *ctx,const void *inp,size_t len); ++void sha512_block_data_order(void *ctx,const void *inp,size_t len) ++ { ++ OPENSSL_ppccap_P&PPC_CRYPTO207? sha512_block_p8(ctx,inp,len): ++ sha512_block_ppc(ctx,inp,len); ++ } ++ + static sigjmp_buf ill_jmp; + static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); } + + void OPENSSL_ppc64_probe(void); + void OPENSSL_altivec_probe(void); ++void OPENSSL_crypto207_probe(void); + + void OPENSSL_cpuid_setup(void) + { +@@ -85,12 +104,14 @@ void OPENSSL_cpuid_setup(void) + OPENSSL_ppccap_P = 0; + + #if defined(_AIX) +- if (sizeof(size_t)==4 ++ if (sizeof(size_t)==4) ++ { ++ struct utsname uts; + # if defined(_SC_AIX_KERNEL_BITMODE) +- && sysconf(_SC_AIX_KERNEL_BITMODE)!=64 ++ if (sysconf(_SC_AIX_KERNEL_BITMODE)!=64) return; + # endif +- ) +- return; ++ if (uname(&uts)!=0 || atoi(uts.version)<6) return; ++ } + #endif + + memset(&ill_act,0,sizeof(ill_act)); +@@ -102,6 +123,10 @@ void OPENSSL_cpuid_setup(void) + + if (sizeof(size_t)==4) + { ++#ifdef __linux ++ struct utsname uts; ++ if (uname(&uts)==0 && strcmp(uts.machine,"ppc64")==0) ++#endif + if (sigsetjmp(ill_jmp,1) == 0) + { + OPENSSL_ppc64_probe(); +@@ -119,6 +144,11 @@ void OPENSSL_cpuid_setup(void) + { + OPENSSL_altivec_probe(); + OPENSSL_ppccap_P |= PPC_ALTIVEC; ++ if (sigsetjmp(ill_jmp,1) == 0) ++ { ++ OPENSSL_crypto207_probe(); ++ OPENSSL_ppccap_P |= PPC_CRYPTO207; ++ } + } + + sigaction (SIGILL,&ill_oact,NULL); +diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl +index 4ba736a..56cc851 100755 +--- a/crypto/ppccpuid.pl ++++ b/crypto/ppccpuid.pl +@@ -31,6 +31,7 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe + + .globl .OPENSSL_altivec_probe + .align 4 +@@ -39,6 +40,17 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe ++ ++.globl .OPENSSL_crypto207_probe ++.align 4 ++.OPENSSL_crypto207_probe: ++ lvx_u v0,0,r1 ++ vcipher v0,v0,v0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe + + .globl .OPENSSL_wipe_cpu + .align 4 +@@ -71,6 +83,7 @@ $code=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu + + .globl .OPENSSL_atomic_add + .align 4 +@@ -84,6 +97,7 @@ Ladd: lwarx r5,0,r3 + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 ++.size .OPENSSL_atomic_add,.-.OPENSSL_atomic_add + + .globl .OPENSSL_rdtsc + .align 4 +@@ -93,6 +107,7 @@ Ladd: lwarx r5,0,r3 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .OPENSSL_rdtsc,.-.OPENSSL_rdtsc + + .globl .OPENSSL_cleanse + .align 4 +@@ -125,7 +140,99 @@ Laligned: + .long 0 + .byte 0,12,0x14,0,0,0,2,0 + .long 0 ++.size .OPENSSL_cleanse,.-.OPENSSL_cleanse ++___ ++{ ++my ($out,$cnt,$max)=("r3","r4","r5"); ++my ($tick,$lasttick)=("r6","r7"); ++my ($diff,$lastdiff)=("r8","r9"); ++ ++$code.=<<___; ++.globl .OPENSSL_instrument_bus ++.align 4 ++.OPENSSL_instrument_bus: ++ mtctr $cnt ++ ++ mftb $lasttick # collect 1st tick ++ li $diff,0 ++ ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++Loop: mftb $tick ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ addi $out,$out,4 # ++$out ++ bdnz Loop ++ ++ mr r3,$cnt ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,2,0 ++ .long 0 ++.size .OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus ++ ++.globl .OPENSSL_instrument_bus2 ++.align 4 ++.OPENSSL_instrument_bus2: ++ mr r0,$cnt ++ slwi $cnt,$cnt,2 ++ ++ mftb $lasttick # collect 1st tick ++ li $diff,0 ++ ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++ mftb $tick # collect 1st diff ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ mr $lastdiff,$diff ++Loop2: ++ dcbf 0,$out # flush cache line ++ lwarx $tick,0,$out # load and lock ++ add $tick,$tick,$diff ++ stwcx. $tick,0,$out ++ stwx $tick,0,$out ++ ++ addic. $max,$max,-1 ++ beq Ldone2 ++ ++ mftb $tick ++ sub $diff,$tick,$lasttick ++ mr $lasttick,$tick ++ cmplw 7,$diff,$lastdiff ++ mr $lastdiff,$diff ++ ++ mfcr $tick # pull cr ++ not $tick,$tick # flip bits ++ rlwinm $tick,$tick,1,29,29 # isolate flipped eq bit and scale ++ ++ sub. $cnt,$cnt,$tick # conditional --$cnt ++ add $out,$out,$tick # conditional ++$out ++ bne Loop2 ++ ++Ldone2: ++ srwi $cnt,$cnt,2 ++ sub r3,r0,$cnt ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,3,0 ++ .long 0 ++.size .OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2 + ___ ++} + + $code =~ s/\`([^\`]*)\`/eval $1/gem; + print $code; +diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile +index 6d191d3..58c6705 100644 +--- a/crypto/sha/Makefile ++++ b/crypto/sha/Makefile +@@ -73,6 +73,8 @@ sha512-sparcv9.s:asm/sha512-sparcv9.pl; $(PERL) asm/sha512-sparcv9.pl $@ $(CFLAG + sha1-ppc.s: asm/sha1-ppc.pl; $(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@ + sha256-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ + sha512-ppc.s: asm/sha512-ppc.pl; $(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@ ++sha256p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ ++sha512p8-ppc.s: asm/sha512p8-ppc.pl; $(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@ + + sha1-parisc.s: asm/sha1-parisc.pl; $(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@ + sha256-parisc.s:asm/sha512-parisc.pl; $(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@ +diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl +index 2140dd2..df59896 100755 +--- a/crypto/sha/asm/sha1-ppc.pl ++++ b/crypto/sha/asm/sha1-ppc.pl +@@ -9,8 +9,7 @@ + + # I let hardware handle unaligned input(*), except on page boundaries + # (see below for details). Otherwise straightforward implementation +-# with X vector in register bank. The module is big-endian [which is +-# not big deal as there're no little-endian targets left around]. ++# with X vector in register bank. + # + # (*) this means that this module is inappropriate for PPC403? Does + # anybody know if pre-POWER3 can sustain unaligned load? +@@ -38,6 +37,10 @@ if ($flavour =~ /64/) { + $PUSH ="stw"; + } else { die "nonsense $flavour"; } + ++# Define endianess based on flavour ++# i.e.: linux64le ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -68,14 +71,28 @@ $T ="r12"; + @X=("r16","r17","r18","r19","r20","r21","r22","r23", + "r24","r25","r26","r27","r28","r29","r30","r31"); + ++sub loadbe { ++my ($dst, $src, $temp_reg) = @_; ++$code.=<<___ if (!$LITTLE_ENDIAN); ++ lwz $dst,$src ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $temp_reg,$src ++ rotlwi $dst,$temp_reg,8 ++ rlwimi $dst,$temp_reg,24,0,7 ++ rlwimi $dst,$temp_reg,24,16,23 ++___ ++} ++ + sub BODY_00_19 { + my ($i,$a,$b,$c,$d,$e,$f)=@_; + my $j=$i+1; +-$code.=<<___ if ($i==0); +- lwz @X[$i],`$i*4`($inp) +-___ ++ ++ # Since the last value of $f is discarded, we can use ++ # it as a temp reg to swap byte-order when needed. ++ loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0); ++ loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15); + $code.=<<___ if ($i<15); +- lwz @X[$j],`$j*4`($inp) + add $f,$K,$e + rotlwi $e,$a,5 + add $f,$f,@X[$i] +@@ -108,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_; + my $j=$i+1; + $code.=<<___ if ($i<79); + add $f,$K,$e ++ xor $t0,$b,$d + rotlwi $e,$a,5 + xor @X[$j%16],@X[$j%16],@X[($j+2)%16] + add $f,$f,@X[$i%16] +- xor $t0,$b,$c ++ xor $t0,$t0,$c + xor @X[$j%16],@X[$j%16],@X[($j+8)%16] +- add $f,$f,$e ++ add $f,$f,$t0 + rotlwi $b,$b,30 +- xor $t0,$t0,$d + xor @X[$j%16],@X[$j%16],@X[($j+13)%16] +- add $f,$f,$t0 ++ add $f,$f,$e + rotlwi @X[$j%16],@X[$j%16],1 + ___ + $code.=<<___ if ($i==79); + add $f,$K,$e ++ xor $t0,$b,$d + rotlwi $e,$a,5 + lwz r16,0($ctx) + add $f,$f,@X[$i%16] +- xor $t0,$b,$c ++ xor $t0,$t0,$c + lwz r17,4($ctx) +- add $f,$f,$e ++ add $f,$f,$t0 + rotlwi $b,$b,30 + lwz r18,8($ctx) +- xor $t0,$t0,$d + lwz r19,12($ctx) +- add $f,$f,$t0 ++ add $f,$f,$e + lwz r20,16($ctx) + ___ + } +@@ -316,6 +333,7 @@ $code.=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size .sha1_block_data_order,.-.sha1_block_data_order + ___ + $code.=<<___; + .asciz "SHA1 block transform for PPC, CRYPTOGAMS by " +diff --git a/crypto/sha/asm/sha512-ppc.pl b/crypto/sha/asm/sha512-ppc.pl +index 6b44a68..734f3c1 100755 +--- a/crypto/sha/asm/sha512-ppc.pl ++++ b/crypto/sha/asm/sha512-ppc.pl +@@ -1,7 +1,7 @@ + #!/usr/bin/env perl + + # ==================================================================== +-# Written by Andy Polyakov for the OpenSSL ++# Written by Andy Polyakov for the OpenSSL + # project. The module is, however, dual licensed under OpenSSL and + # CRYPTOGAMS licenses depending on where you obtain it. For further + # details see http://www.openssl.org/~appro/cryptogams/. +@@ -9,8 +9,7 @@ + + # I let hardware handle unaligned input, except on page boundaries + # (see below for details). Otherwise straightforward implementation +-# with X vector in register bank. The module is big-endian [which is +-# not big deal as there're no little-endian targets left around]. ++# with X vector in register bank. + + # sha256 | sha512 + # -m64 -m32 | -m64 -m32 +@@ -56,6 +55,8 @@ if ($flavour =~ /64/) { + $PUSH="stw"; + } else { die "nonsense $flavour"; } + ++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; ++ + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; + ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or + ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or +@@ -64,7 +65,7 @@ die "can't locate ppc-xlate.pl"; + open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; + + if ($output =~ /512/) { +- $func="sha512_block_data_order"; ++ $func="sha512_block_ppc"; + $SZ=8; + @Sigma0=(28,34,39); + @Sigma1=(14,18,41); +@@ -76,7 +77,7 @@ if ($output =~ /512/) { + $ROR="rotrdi"; + $SHR="srdi"; + } else { +- $func="sha256_block_data_order"; ++ $func="sha256_block_ppc"; + $SZ=4; + @Sigma0=( 2,13,22); + @Sigma1=( 6,11,25); +@@ -110,7 +111,7 @@ $B ="r9"; + $C ="r10"; + $D ="r11"; + $E ="r12"; +-$F ="r13"; $F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer ++$F =$t1; $t1 = "r0"; # stay away from "r13"; + $G ="r14"; + $H ="r15"; + +@@ -118,24 +119,23 @@ $H ="r15"; + @X=("r16","r17","r18","r19","r20","r21","r22","r23", + "r24","r25","r26","r27","r28","r29","r30","r31"); + +-$inp="r31"; # reassigned $inp! aliases with @X[15] ++$inp="r31" if($SZ==4 || $SIZE_T==8); # reassigned $inp! aliases with @X[15] + + sub ROUND_00_15 { + my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; + $code.=<<___; +- $LD $T,`$i*$SZ`($Tbl) + $ROR $a0,$e,$Sigma1[0] + $ROR $a1,$e,$Sigma1[1] + and $t0,$f,$e +- andc $t1,$g,$e +- add $T,$T,$h + xor $a0,$a0,$a1 ++ add $h,$h,$t1 ++ andc $t1,$g,$e + $ROR $a1,$a1,`$Sigma1[2]-$Sigma1[1]` + or $t0,$t0,$t1 ; Ch(e,f,g) +- add $T,$T,@X[$i] ++ add $h,$h,@X[$i%16] + xor $a0,$a0,$a1 ; Sigma1(e) +- add $T,$T,$t0 +- add $T,$T,$a0 ++ add $h,$h,$t0 ++ add $h,$h,$a0 + + $ROR $a0,$a,$Sigma0[0] + $ROR $a1,$a,$Sigma0[1] +@@ -146,9 +146,14 @@ $code.=<<___; + xor $t0,$t0,$t1 + and $t1,$b,$c + xor $a0,$a0,$a1 ; Sigma0(a) +- add $d,$d,$T ++ add $d,$d,$h + xor $t0,$t0,$t1 ; Maj(a,b,c) +- add $h,$T,$a0 ++___ ++$code.=<<___ if ($i<15); ++ $LD $t1,`($i+1)*$SZ`($Tbl) ++___ ++$code.=<<___; ++ add $h,$h,$a0 + add $h,$h,$t0 + + ___ +@@ -169,10 +174,11 @@ $code.=<<___; + add @X[$i],@X[$i],@X[($i+9)%16] + xor $a0,$a0,$a1 ; sigma0(X[(i+1)&0x0f]) + xor $t0,$t0,$t1 ; sigma1(X[(i+14)&0x0f]) ++ $LD $t1,`$i*$SZ`($Tbl) + add @X[$i],@X[$i],$a0 + add @X[$i],@X[$i],$t0 + ___ +-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h); ++&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h); + } + + $code=<<___; +@@ -188,8 +194,6 @@ $func: + + $PUSH $ctx,`$FRAME-$SIZE_T*22`($sp) + +- $PUSH $toc,`$FRAME-$SIZE_T*20`($sp) +- $PUSH r13,`$FRAME-$SIZE_T*19`($sp) + $PUSH r14,`$FRAME-$SIZE_T*18`($sp) + $PUSH r15,`$FRAME-$SIZE_T*17`($sp) + $PUSH r16,`$FRAME-$SIZE_T*16`($sp) +@@ -209,7 +213,10 @@ $func: + $PUSH r30,`$FRAME-$SIZE_T*2`($sp) + $PUSH r31,`$FRAME-$SIZE_T*1`($sp) + $PUSH r0,`$FRAME+$LRSAVE`($sp) ++___ + ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + $LD $A,`0*$SZ`($ctx) + mr $inp,r4 ; incarnate $inp + $LD $B,`1*$SZ`($ctx) +@@ -219,7 +226,16 @@ $func: + $LD $F,`5*$SZ`($ctx) + $LD $G,`6*$SZ`($ctx) + $LD $H,`7*$SZ`($ctx) ++___ ++} else { ++ for ($i=16;$i<32;$i++) { ++ $code.=<<___; ++ lwz r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx) ++___ ++ } ++} + ++$code.=<<___; + bl LPICmeup + LPICedup: + andi. r0,$inp,3 +@@ -255,6 +271,9 @@ Lunaligned: + Lcross_page: + li $t1,`16*$SZ/4` + mtctr $t1 ++___ ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + addi r20,$sp,$LOCALS ; aligned spot below the frame + Lmemcpy: + lbz r16,0($inp) +@@ -268,7 +287,26 @@ Lmemcpy: + stb r19,3(r20) + addi r20,r20,4 + bdnz Lmemcpy ++___ ++} else { ++$code.=<<___; ++ addi r12,$sp,$LOCALS ; aligned spot below the frame ++Lmemcpy: ++ lbz r8,0($inp) ++ lbz r9,1($inp) ++ lbz r10,2($inp) ++ lbz r11,3($inp) ++ addi $inp,$inp,4 ++ stb r8,0(r12) ++ stb r9,1(r12) ++ stb r10,2(r12) ++ stb r11,3(r12) ++ addi r12,r12,4 ++ bdnz Lmemcpy ++___ ++} + ++$code.=<<___; + $PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp + addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer + addi $inp,$sp,$LOCALS ; fictitious inp pointer +@@ -283,8 +321,6 @@ Lmemcpy: + + Ldone: + $POP r0,`$FRAME+$LRSAVE`($sp) +- $POP $toc,`$FRAME-$SIZE_T*20`($sp) +- $POP r13,`$FRAME-$SIZE_T*19`($sp) + $POP r14,`$FRAME-$SIZE_T*18`($sp) + $POP r15,`$FRAME-$SIZE_T*17`($sp) + $POP r16,`$FRAME-$SIZE_T*16`($sp) +@@ -309,27 +345,48 @@ Ldone: + .long 0 + .byte 0,12,4,1,0x80,18,3,0 + .long 0 ++___ + ++if ($SZ==4 || $SIZE_T==8) { ++$code.=<<___; + .align 4 + Lsha2_block_private: ++ $LD $t1,0($Tbl) + ___ + for($i=0;$i<16;$i++) { +-$code.=<<___ if ($SZ==4); ++$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN); + lwz @X[$i],`$i*$SZ`($inp) + ___ ++$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN); ++ lwz $a0,`$i*$SZ`($inp) ++ rotlwi @X[$i],$a0,8 ++ rlwimi @X[$i],$a0,24,0,7 ++ rlwimi @X[$i],$a0,24,16,23 ++___ + # 64-bit loads are split to 2x32-bit ones, as CPU can't handle + # unaligned 64-bit loads, only 32-bit ones... +-$code.=<<___ if ($SZ==8); ++$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN); + lwz $t0,`$i*$SZ`($inp) + lwz @X[$i],`$i*$SZ+4`($inp) + insrdi @X[$i],$t0,32,0 + ___ ++$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN); ++ lwz $a0,`$i*$SZ`($inp) ++ lwz $a1,`$i*$SZ+4`($inp) ++ rotlwi $t0,$a0,8 ++ rotlwi @X[$i],$a1,8 ++ rlwimi $t0,$a0,24,0,7 ++ rlwimi @X[$i],$a1,24,0,7 ++ rlwimi $t0,$a0,24,16,23 ++ rlwimi @X[$i],$a1,24,16,23 ++ insrdi @X[$i],$t0,32,0 ++___ + &ROUND_00_15($i,@V); + unshift(@V,pop(@V)); + } + $code.=<<___; +- li $T,`$rounds/16-1` +- mtctr $T ++ li $t0,`$rounds/16-1` ++ mtctr $t0 + .align 4 + Lrounds: + addi $Tbl,$Tbl,`16*$SZ` +@@ -377,7 +434,282 @@ $code.=<<___; + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 ++.size $func,.-$func ++___ ++} else { ++######################################################################## ++# SHA512 for PPC32, X vector is off-loaded to stack... ++# ++# | sha512 ++# | -m32 ++# ----------------------+----------------------- ++# PPC74x0,gcc-4.0.1 | +48% ++# POWER6,gcc-4.4.6 | +124%(*) ++# POWER7,gcc-4.4.6 | +79%(*) ++# e300,gcc-4.1.0 | +167% ++# ++# (*) ~1/3 of -m64 result [and ~20% better than -m32 code generated ++# by xlc-12.1] ++ ++my $XOFF=$LOCALS; ++ ++my @V=map("r$_",(16..31)); # A..H ++ ++my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15)); ++my ($x0,$x1)=("r3","r4"); # zaps $ctx and $inp ++ ++sub ROUND_00_15_ppc32 { ++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, ++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; ++ ++$code.=<<___; ++ lwz $t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl) ++ xor $a0,$flo,$glo ++ lwz $t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl) ++ xor $a1,$fhi,$ghi ++ addc $hlo,$hlo,$t0 ; h+=x[i] ++ stw $t0,`$XOFF+0+$SZ*($i%16)`($sp) ; save x[i] ++ ++ srwi $s0,$elo,$Sigma1[0] ++ srwi $s1,$ehi,$Sigma1[0] ++ and $a0,$a0,$elo ++ adde $hhi,$hhi,$t1 ++ and $a1,$a1,$ehi ++ stw $t1,`$XOFF+4+$SZ*($i%16)`($sp) ++ srwi $t0,$elo,$Sigma1[1] ++ srwi $t1,$ehi,$Sigma1[1] ++ addc $hlo,$hlo,$t2 ; h+=K512[i] ++ insrwi $s0,$ehi,$Sigma1[0],0 ++ insrwi $s1,$elo,$Sigma1[0],0 ++ xor $a0,$a0,$glo ; Ch(e,f,g) ++ adde $hhi,$hhi,$t3 ++ xor $a1,$a1,$ghi ++ insrwi $t0,$ehi,$Sigma1[1],0 ++ insrwi $t1,$elo,$Sigma1[1],0 ++ addc $hlo,$hlo,$a0 ; h+=Ch(e,f,g) ++ srwi $t2,$ehi,$Sigma1[2]-32 ++ srwi $t3,$elo,$Sigma1[2]-32 ++ xor $s0,$s0,$t0 ++ xor $s1,$s1,$t1 ++ insrwi $t2,$elo,$Sigma1[2]-32,0 ++ insrwi $t3,$ehi,$Sigma1[2]-32,0 ++ xor $a0,$alo,$blo ; a^b, b^c in next round ++ adde $hhi,$hhi,$a1 ++ xor $a1,$ahi,$bhi ++ xor $s0,$s0,$t2 ; Sigma1(e) ++ xor $s1,$s1,$t3 ++ ++ srwi $t0,$alo,$Sigma0[0] ++ and $a2,$a2,$a0 ++ addc $hlo,$hlo,$s0 ; h+=Sigma1(e) ++ and $a3,$a3,$a1 ++ srwi $t1,$ahi,$Sigma0[0] ++ srwi $s0,$ahi,$Sigma0[1]-32 ++ adde $hhi,$hhi,$s1 ++ srwi $s1,$alo,$Sigma0[1]-32 ++ insrwi $t0,$ahi,$Sigma0[0],0 ++ insrwi $t1,$alo,$Sigma0[0],0 ++ xor $a2,$a2,$blo ; Maj(a,b,c) ++ addc $dlo,$dlo,$hlo ; d+=h ++ xor $a3,$a3,$bhi ++ insrwi $s0,$alo,$Sigma0[1]-32,0 ++ insrwi $s1,$ahi,$Sigma0[1]-32,0 ++ adde $dhi,$dhi,$hhi ++ srwi $t2,$ahi,$Sigma0[2]-32 ++ srwi $t3,$alo,$Sigma0[2]-32 ++ xor $s0,$s0,$t0 ++ addc $hlo,$hlo,$a2 ; h+=Maj(a,b,c) ++ xor $s1,$s1,$t1 ++ insrwi $t2,$alo,$Sigma0[2]-32,0 ++ insrwi $t3,$ahi,$Sigma0[2]-32,0 ++ adde $hhi,$hhi,$a3 ++___ ++$code.=<<___ if ($i>=15); ++ lwz $t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp) ++ lwz $t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp) ++___ ++$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN); ++ lwz $t1,`$SZ*($i+1)+0`($inp) ++ lwz $t0,`$SZ*($i+1)+4`($inp) + ___ ++$code.=<<___ if ($i<15 && $LITTLE_ENDIAN); ++ lwz $a2,`$SZ*($i+1)+0`($inp) ++ lwz $a3,`$SZ*($i+1)+4`($inp) ++ rotlwi $t1,$a2,8 ++ rotlwi $t0,$a3,8 ++ rlwimi $t1,$a2,24,0,7 ++ rlwimi $t0,$a3,24,0,7 ++ rlwimi $t1,$a2,24,16,23 ++ rlwimi $t0,$a3,24,16,23 ++___ ++$code.=<<___; ++ xor $s0,$s0,$t2 ; Sigma0(a) ++ xor $s1,$s1,$t3 ++ addc $hlo,$hlo,$s0 ; h+=Sigma0(a) ++ adde $hhi,$hhi,$s1 ++___ ++$code.=<<___ if ($i==15); ++ lwz $x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp) ++ lwz $x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp) ++___ ++} ++sub ROUND_16_xx_ppc32 { ++my ($i, $ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo, ++ $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_; ++ ++$code.=<<___; ++ srwi $s0,$t0,$sigma0[0] ++ srwi $s1,$t1,$sigma0[0] ++ srwi $t2,$t0,$sigma0[1] ++ srwi $t3,$t1,$sigma0[1] ++ insrwi $s0,$t1,$sigma0[0],0 ++ insrwi $s1,$t0,$sigma0[0],0 ++ srwi $a0,$t0,$sigma0[2] ++ insrwi $t2,$t1,$sigma0[1],0 ++ insrwi $t3,$t0,$sigma0[1],0 ++ insrwi $a0,$t1,$sigma0[2],0 ++ xor $s0,$s0,$t2 ++ lwz $t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp) ++ srwi $a1,$t1,$sigma0[2] ++ xor $s1,$s1,$t3 ++ lwz $t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp) ++ xor $a0,$a0,$s0 ++ srwi $s0,$t2,$sigma1[0] ++ xor $a1,$a1,$s1 ++ srwi $s1,$t3,$sigma1[0] ++ addc $x0,$x0,$a0 ; x[i]+=sigma0(x[i+1]) ++ srwi $a0,$t3,$sigma1[1]-32 ++ insrwi $s0,$t3,$sigma1[0],0 ++ insrwi $s1,$t2,$sigma1[0],0 ++ adde $x1,$x1,$a1 ++ srwi $a1,$t2,$sigma1[1]-32 ++ ++ insrwi $a0,$t2,$sigma1[1]-32,0 ++ srwi $t2,$t2,$sigma1[2] ++ insrwi $a1,$t3,$sigma1[1]-32,0 ++ insrwi $t2,$t3,$sigma1[2],0 ++ xor $s0,$s0,$a0 ++ lwz $a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp) ++ srwi $t3,$t3,$sigma1[2] ++ xor $s1,$s1,$a1 ++ lwz $a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp) ++ xor $s0,$s0,$t2 ++ addc $x0,$x0,$a0 ; x[i]+=x[i+9] ++ xor $s1,$s1,$t3 ++ adde $x1,$x1,$a1 ++ addc $x0,$x0,$s0 ; x[i]+=sigma1(x[i+14]) ++ adde $x1,$x1,$s1 ++___ ++ ($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1); ++ &ROUND_00_15_ppc32(@_); ++} ++ ++$code.=<<___; ++.align 4 ++Lsha2_block_private: ++___ ++$code.=<<___ if (!$LITTLE_ENDIAN); ++ lwz $t1,0($inp) ++ xor $a2,@V[3],@V[5] ; B^C, magic seed ++ lwz $t0,4($inp) ++ xor $a3,@V[2],@V[4] ++___ ++$code.=<<___ if ($LITTLE_ENDIAN); ++ lwz $a1,0($inp) ++ xor $a2,@V[3],@V[5] ; B^C, magic seed ++ lwz $a0,4($inp) ++ xor $a3,@V[2],@V[4] ++ rotlwi $t1,$a1,8 ++ rotlwi $t0,$a0,8 ++ rlwimi $t1,$a1,24,0,7 ++ rlwimi $t0,$a0,24,0,7 ++ rlwimi $t1,$a1,24,16,23 ++ rlwimi $t0,$a0,24,16,23 ++___ ++for($i=0;$i<16;$i++) { ++ &ROUND_00_15_ppc32($i,@V); ++ unshift(@V,pop(@V)); unshift(@V,pop(@V)); ++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); ++} ++$code.=<<___; ++ li $a0,`$rounds/16-1` ++ mtctr $a0 ++.align 4 ++Lrounds: ++ addi $Tbl,$Tbl,`16*$SZ` ++___ ++for(;$i<32;$i++) { ++ &ROUND_16_xx_ppc32($i,@V); ++ unshift(@V,pop(@V)); unshift(@V,pop(@V)); ++ ($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1); ++} ++$code.=<<___; ++ bdnz- Lrounds ++ ++ $POP $ctx,`$FRAME-$SIZE_T*22`($sp) ++ $POP $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer ++ $POP $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer ++ subi $Tbl,$Tbl,`($rounds-16)*$SZ` ; rewind Tbl ++ ++ lwz $t0,`$LITTLE_ENDIAN^0`($ctx) ++ lwz $t1,`$LITTLE_ENDIAN^4`($ctx) ++ lwz $t2,`$LITTLE_ENDIAN^8`($ctx) ++ lwz $t3,`$LITTLE_ENDIAN^12`($ctx) ++ lwz $a0,`$LITTLE_ENDIAN^16`($ctx) ++ lwz $a1,`$LITTLE_ENDIAN^20`($ctx) ++ lwz $a2,`$LITTLE_ENDIAN^24`($ctx) ++ addc @V[1],@V[1],$t1 ++ lwz $a3,`$LITTLE_ENDIAN^28`($ctx) ++ adde @V[0],@V[0],$t0 ++ lwz $t0,`$LITTLE_ENDIAN^32`($ctx) ++ addc @V[3],@V[3],$t3 ++ lwz $t1,`$LITTLE_ENDIAN^36`($ctx) ++ adde @V[2],@V[2],$t2 ++ lwz $t2,`$LITTLE_ENDIAN^40`($ctx) ++ addc @V[5],@V[5],$a1 ++ lwz $t3,`$LITTLE_ENDIAN^44`($ctx) ++ adde @V[4],@V[4],$a0 ++ lwz $a0,`$LITTLE_ENDIAN^48`($ctx) ++ addc @V[7],@V[7],$a3 ++ lwz $a1,`$LITTLE_ENDIAN^52`($ctx) ++ adde @V[6],@V[6],$a2 ++ lwz $a2,`$LITTLE_ENDIAN^56`($ctx) ++ addc @V[9],@V[9],$t1 ++ lwz $a3,`$LITTLE_ENDIAN^60`($ctx) ++ adde @V[8],@V[8],$t0 ++ stw @V[0],`$LITTLE_ENDIAN^0`($ctx) ++ stw @V[1],`$LITTLE_ENDIAN^4`($ctx) ++ addc @V[11],@V[11],$t3 ++ stw @V[2],`$LITTLE_ENDIAN^8`($ctx) ++ stw @V[3],`$LITTLE_ENDIAN^12`($ctx) ++ adde @V[10],@V[10],$t2 ++ stw @V[4],`$LITTLE_ENDIAN^16`($ctx) ++ stw @V[5],`$LITTLE_ENDIAN^20`($ctx) ++ addc @V[13],@V[13],$a1 ++ stw @V[6],`$LITTLE_ENDIAN^24`($ctx) ++ stw @V[7],`$LITTLE_ENDIAN^28`($ctx) ++ adde @V[12],@V[12],$a0 ++ stw @V[8],`$LITTLE_ENDIAN^32`($ctx) ++ stw @V[9],`$LITTLE_ENDIAN^36`($ctx) ++ addc @V[15],@V[15],$a3 ++ stw @V[10],`$LITTLE_ENDIAN^40`($ctx) ++ stw @V[11],`$LITTLE_ENDIAN^44`($ctx) ++ adde @V[14],@V[14],$a2 ++ stw @V[12],`$LITTLE_ENDIAN^48`($ctx) ++ stw @V[13],`$LITTLE_ENDIAN^52`($ctx) ++ stw @V[14],`$LITTLE_ENDIAN^56`($ctx) ++ stw @V[15],`$LITTLE_ENDIAN^60`($ctx) ++ ++ addi $inp,$inp,`16*$SZ` ; advance inp ++ $PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ++ $UCMP $inp,$num ++ bne Lsha2_block_private ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++.size $func,.-$func ++___ ++} + + # Ugly hack here, because PPC assembler syntax seem to vary too + # much from platforms to platform... +@@ -395,46 +727,46 @@ LPICmeup: + .space `64-9*4` + ___ + $code.=<<___ if ($SZ==8); +- .long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd +- .long 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc +- .long 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019 +- .long 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118 +- .long 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe +- .long 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2 +- .long 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1 +- .long 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694 +- .long 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3 +- .long 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65 +- .long 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483 +- .long 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5 +- .long 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210 +- .long 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4 +- .long 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725 +- .long 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70 +- .long 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926 +- .long 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df +- .long 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8 +- .long 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b +- .long 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001 +- .long 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30 +- .long 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910 +- .long 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8 +- .long 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53 +- .long 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8 +- .long 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb +- .long 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3 +- .long 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60 +- .long 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec +- .long 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9 +- .long 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b +- .long 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207 +- .long 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178 +- .long 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6 +- .long 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b +- .long 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493 +- .long 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c +- .long 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a +- .long 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817 ++ .quad 0x428a2f98d728ae22,0x7137449123ef65cd ++ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc ++ .quad 0x3956c25bf348b538,0x59f111f1b605d019 ++ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 ++ .quad 0xd807aa98a3030242,0x12835b0145706fbe ++ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 ++ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 ++ .quad 0x9bdc06a725c71235,0xc19bf174cf692694 ++ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 ++ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 ++ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 ++ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 ++ .quad 0x983e5152ee66dfab,0xa831c66d2db43210 ++ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 ++ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 ++ .quad 0x06ca6351e003826f,0x142929670a0e6e70 ++ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 ++ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df ++ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 ++ .quad 0x81c2c92e47edaee6,0x92722c851482353b ++ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 ++ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 ++ .quad 0xd192e819d6ef5218,0xd69906245565a910 ++ .quad 0xf40e35855771202a,0x106aa07032bbd1b8 ++ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 ++ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 ++ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb ++ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 ++ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 ++ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec ++ .quad 0x90befffa23631e28,0xa4506cebde82bde9 ++ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b ++ .quad 0xca273eceea26619c,0xd186b8c721c0c207 ++ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 ++ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 ++ .quad 0x113f9804bef90dae,0x1b710b35131c471b ++ .quad 0x28db77f523047d84,0x32caab7b40c72493 ++ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c ++ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a ++ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + ___ + $code.=<<___ if ($SZ==4); + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl +new file mode 100755 +index 0000000..a316b31 +--- /dev/null ++++ b/crypto/sha/asm/sha512p8-ppc.pl +@@ -0,0 +1,423 @@ ++#!/usr/bin/env perl ++ ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++ ++# SHA256/512 for PowerISA v2.07. ++# ++# Accurate performance measurements are problematic, because it's ++# always virtualized setup with possibly throttled processor. ++# Relative comparison is therefore more informative. This module is ++# ~60% faster than integer-only sha512-ppc.pl. To anchor to something ++# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than ++# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than ++# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting ++# result is degree of computational resources' utilization. POWER8 is ++# "massively multi-threaded chip" and difference between single- and ++# maximum multi-process benchmark results tells that utlization is ++# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and ++# for sha1-ppc.pl - 73%. 100% means that multi-process result equals ++# to single-process one, given that all threads end up on the same ++# physical core. ++ ++$flavour=shift; ++$output =shift; ++ ++if ($flavour =~ /64/) { ++ $SIZE_T=8; ++ $LRSAVE=2*$SIZE_T; ++ $STU="stdu"; ++ $POP="ld"; ++ $PUSH="std"; ++} elsif ($flavour =~ /32/) { ++ $SIZE_T=4; ++ $LRSAVE=$SIZE_T; ++ $STU="stwu"; ++ $POP="lwz"; ++ $PUSH="stw"; ++} else { die "nonsense $flavour"; } ++ ++$LENDIAN=($flavour=~/le/); ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or ++die "can't locate ppc-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; ++ ++if ($output =~ /512/) { ++ $bits=512; ++ $SZ=8; ++ $sz="d"; ++ $rounds=80; ++} else { ++ $bits=256; ++ $SZ=4; ++ $sz="w"; ++ $rounds=64; ++} ++ ++$func="sha${bits}_block_p8"; ++$FRAME=8*$SIZE_T; ++ ++$sp ="r1"; ++$toc="r2"; ++$ctx="r3"; ++$inp="r4"; ++$num="r5"; ++$Tbl="r6"; ++$idx="r7"; ++$lrsave="r8"; ++$offload="r11"; ++$vrsave="r12"; ++($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31)); ++ ++@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7)); ++@X=map("v$_",(8..23)); ++($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31)); ++ ++sub ROUND { ++my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_; ++my $j=($i+1)%16; ++ ++$code.=<<___ if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1)); ++ lvx_u @X[$i+1],0,$inp ; load X[i] in advance ++ addi $inp,$inp,16 ++___ ++$code.=<<___ if ($i<16 && ($i%(16/$SZ))); ++ vsldoi @X[$i],@X[$i-1],@X[$i-1],$SZ ++___ ++$code.=<<___ if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0); ++ vperm @X[$i],@X[$i],@X[$i],$lemask ++___ ++$code.=<<___; ++ `"vshasigma${sz} $s0,@X[($j+1)%16],0,0" if ($i>=15)` ++ vsel $Func,$g,$f,$e ; Ch(e,f,g) ++ vshasigma${sz} $S1,$e,1,15 ; Sigma1(e) ++ vaddu${sz}m $h,$h,@X[$i%16] ; h+=X[i] ++ vshasigma${sz} $S0,$a,1,0 ; Sigma0(a) ++ `"vshasigma${sz} $s1,@X[($j+14)%16],0,15" if ($i>=15)` ++ vaddu${sz}m $h,$h,$Func ; h+=Ch(e,f,g) ++ vxor $Func,$a,$b ++ `"vaddu${sz}m @X[$j],@X[$j],@X[($j+9)%16]" if ($i>=15)` ++ vaddu${sz}m $h,$h,$S1 ; h+=Sigma1(e) ++ vsel $Func,$b,$c,$Func ; Maj(a,b,c) ++ vaddu${sz}m $g,$g,$Ki ; future h+=K[i] ++ vaddu${sz}m $d,$d,$h ; d+=h ++ vaddu${sz}m $S0,$S0,$Func ; Sigma0(a)+Maj(a,b,c) ++ `"vaddu${sz}m @X[$j],@X[$j],$s0" if ($i>=15)` ++ lvx $Ki,$idx,$Tbl ; load next K[i] ++ addi $idx,$idx,16 ++ vaddu${sz}m $h,$h,$S0 ; h+=Sigma0(a)+Maj(a,b,c) ++ `"vaddu${sz}m @X[$j],@X[$j],$s1" if ($i>=15)` ++___ ++} ++ ++$code=<<___; ++.machine "any" ++.text ++ ++.globl $func ++.align 6 ++$func: ++ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) ++ mflr $lrsave ++ li r10,`$FRAME+8*16+15` ++ li r11,`$FRAME+8*16+31` ++ stvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ mfspr $vrsave,256 ++ stvx v21,r11,$sp ++ addi r11,r11,32 ++ stvx v22,r10,$sp ++ addi r10,r10,32 ++ stvx v23,r11,$sp ++ addi r11,r11,32 ++ stvx v24,r10,$sp ++ addi r10,r10,32 ++ stvx v25,r11,$sp ++ addi r11,r11,32 ++ stvx v26,r10,$sp ++ addi r10,r10,32 ++ stvx v27,r11,$sp ++ addi r11,r11,32 ++ stvx v28,r10,$sp ++ addi r10,r10,32 ++ stvx v29,r11,$sp ++ addi r11,r11,32 ++ stvx v30,r10,$sp ++ stvx v31,r11,$sp ++ li r11,-1 ++ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave ++ li $x10,0x10 ++ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ li $x20,0x20 ++ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ li $x30,0x30 ++ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ li $x40,0x40 ++ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ li $x50,0x50 ++ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ li $x60,0x60 ++ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ li $x70,0x70 ++ $PUSH $lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) ++ mtspr 256,r11 ++ ++ bl LPICmeup ++ addi $offload,$sp,$FRAME+15 ++___ ++$code.=<<___ if ($LENDIAN); ++ li $idx,8 ++ lvsl $lemask,0,$idx ++ vspltisb $Ki,0x0f ++ vxor $lemask,$lemask,$Ki ++___ ++$code.=<<___ if ($SZ==4); ++ lvx_4w $A,$x00,$ctx ++ lvx_4w $E,$x10,$ctx ++ vsldoi $B,$A,$A,4 # unpack ++ vsldoi $C,$A,$A,8 ++ vsldoi $D,$A,$A,12 ++ vsldoi $F,$E,$E,4 ++ vsldoi $G,$E,$E,8 ++ vsldoi $H,$E,$E,12 ++___ ++$code.=<<___ if ($SZ==8); ++ lvx_u $A,$x00,$ctx ++ lvx_u $C,$x10,$ctx ++ lvx_u $E,$x20,$ctx ++ vsldoi $B,$A,$A,8 # unpack ++ lvx_u $G,$x30,$ctx ++ vsldoi $D,$C,$C,8 ++ vsldoi $F,$E,$E,8 ++ vsldoi $H,$G,$G,8 ++___ ++$code.=<<___; ++ li r0,`($rounds-16)/16` # inner loop counter ++ b Loop ++.align 5 ++Loop: ++ lvx $Ki,$x00,$Tbl ++ li $idx,16 ++ lvx_u @X[0],0,$inp ++ addi $inp,$inp,16 ++ stvx $A,$x00,$offload # offload $A-$H ++ stvx $B,$x10,$offload ++ stvx $C,$x20,$offload ++ stvx $D,$x30,$offload ++ stvx $E,$x40,$offload ++ stvx $F,$x50,$offload ++ stvx $G,$x60,$offload ++ stvx $H,$x70,$offload ++ vaddu${sz}m $H,$H,$Ki # h+K[i] ++ lvx $Ki,$idx,$Tbl ++ addi $idx,$idx,16 ++___ ++for ($i=0;$i<16;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } ++$code.=<<___; ++ mtctr r0 ++ b L16_xx ++.align 5 ++L16_xx: ++___ ++for (;$i<32;$i++) { &ROUND($i,@V); unshift(@V,pop(@V)); } ++$code.=<<___; ++ bdnz L16_xx ++ ++ lvx @X[2],$x00,$offload ++ subic. $num,$num,1 ++ lvx @X[3],$x10,$offload ++ vaddu${sz}m $A,$A,@X[2] ++ lvx @X[4],$x20,$offload ++ vaddu${sz}m $B,$B,@X[3] ++ lvx @X[5],$x30,$offload ++ vaddu${sz}m $C,$C,@X[4] ++ lvx @X[6],$x40,$offload ++ vaddu${sz}m $D,$D,@X[5] ++ lvx @X[7],$x50,$offload ++ vaddu${sz}m $E,$E,@X[6] ++ lvx @X[8],$x60,$offload ++ vaddu${sz}m $F,$F,@X[7] ++ lvx @X[9],$x70,$offload ++ vaddu${sz}m $G,$G,@X[8] ++ vaddu${sz}m $H,$H,@X[9] ++ bne Loop ++___ ++$code.=<<___ if ($SZ==4); ++ lvx @X[0],$idx,$Tbl ++ addi $idx,$idx,16 ++ vperm $A,$A,$B,$Ki # pack the answer ++ lvx @X[1],$idx,$Tbl ++ vperm $E,$E,$F,$Ki ++ vperm $A,$A,$C,@X[0] ++ vperm $E,$E,$G,@X[0] ++ vperm $A,$A,$D,@X[1] ++ vperm $E,$E,$H,@X[1] ++ stvx_4w $A,$x00,$ctx ++ stvx_4w $E,$x10,$ctx ++___ ++$code.=<<___ if ($SZ==8); ++ vperm $A,$A,$B,$Ki # pack the answer ++ vperm $C,$C,$D,$Ki ++ vperm $E,$E,$F,$Ki ++ vperm $G,$G,$H,$Ki ++ stvx_u $A,$x00,$ctx ++ stvx_u $C,$x10,$ctx ++ stvx_u $E,$x20,$ctx ++ stvx_u $G,$x30,$ctx ++___ ++$code.=<<___; ++ li r10,`$FRAME+8*16+15` ++ mtlr $lrsave ++ li r11,`$FRAME+8*16+31` ++ mtspr 256,$vrsave ++ lvx v20,r10,$sp # ABI says so ++ addi r10,r10,32 ++ lvx v21,r11,$sp ++ addi r11,r11,32 ++ lvx v22,r10,$sp ++ addi r10,r10,32 ++ lvx v23,r11,$sp ++ addi r11,r11,32 ++ lvx v24,r10,$sp ++ addi r10,r10,32 ++ lvx v25,r11,$sp ++ addi r11,r11,32 ++ lvx v26,r10,$sp ++ addi r10,r10,32 ++ lvx v27,r11,$sp ++ addi r11,r11,32 ++ lvx v28,r10,$sp ++ addi r10,r10,32 ++ lvx v29,r11,$sp ++ addi r11,r11,32 ++ lvx v30,r10,$sp ++ lvx v31,r11,$sp ++ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) ++ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) ++ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) ++ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) ++ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) ++ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) ++ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` ++ blr ++ .long 0 ++ .byte 0,12,4,1,0x80,6,3,0 ++ .long 0 ++.size $func,.-$func ++___ ++ ++# Ugly hack here, because PPC assembler syntax seem to vary too ++# much from platforms to platform... ++$code.=<<___; ++.align 6 ++LPICmeup: ++ mflr r0 ++ bcl 20,31,\$+4 ++ mflr $Tbl ; vvvvvv "distance" between . and 1st data entry ++ addi $Tbl,$Tbl,`64-8` ++ mtlr r0 ++ blr ++ .long 0 ++ .byte 0,12,0x14,0,0,0,0,0 ++ .space `64-9*4` ++___ ++ ++if ($SZ==8) { ++ local *table = sub { ++ foreach(@_) { $code.=".quad $_,$_\n"; } ++ }; ++ table( ++ "0x428a2f98d728ae22","0x7137449123ef65cd", ++ "0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc", ++ "0x3956c25bf348b538","0x59f111f1b605d019", ++ "0x923f82a4af194f9b","0xab1c5ed5da6d8118", ++ "0xd807aa98a3030242","0x12835b0145706fbe", ++ "0x243185be4ee4b28c","0x550c7dc3d5ffb4e2", ++ "0x72be5d74f27b896f","0x80deb1fe3b1696b1", ++ "0x9bdc06a725c71235","0xc19bf174cf692694", ++ "0xe49b69c19ef14ad2","0xefbe4786384f25e3", ++ "0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65", ++ "0x2de92c6f592b0275","0x4a7484aa6ea6e483", ++ "0x5cb0a9dcbd41fbd4","0x76f988da831153b5", ++ "0x983e5152ee66dfab","0xa831c66d2db43210", ++ "0xb00327c898fb213f","0xbf597fc7beef0ee4", ++ "0xc6e00bf33da88fc2","0xd5a79147930aa725", ++ "0x06ca6351e003826f","0x142929670a0e6e70", ++ "0x27b70a8546d22ffc","0x2e1b21385c26c926", ++ "0x4d2c6dfc5ac42aed","0x53380d139d95b3df", ++ "0x650a73548baf63de","0x766a0abb3c77b2a8", ++ "0x81c2c92e47edaee6","0x92722c851482353b", ++ "0xa2bfe8a14cf10364","0xa81a664bbc423001", ++ "0xc24b8b70d0f89791","0xc76c51a30654be30", ++ "0xd192e819d6ef5218","0xd69906245565a910", ++ "0xf40e35855771202a","0x106aa07032bbd1b8", ++ "0x19a4c116b8d2d0c8","0x1e376c085141ab53", ++ "0x2748774cdf8eeb99","0x34b0bcb5e19b48a8", ++ "0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb", ++ "0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3", ++ "0x748f82ee5defb2fc","0x78a5636f43172f60", ++ "0x84c87814a1f0ab72","0x8cc702081a6439ec", ++ "0x90befffa23631e28","0xa4506cebde82bde9", ++ "0xbef9a3f7b2c67915","0xc67178f2e372532b", ++ "0xca273eceea26619c","0xd186b8c721c0c207", ++ "0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178", ++ "0x06f067aa72176fba","0x0a637dc5a2c898a6", ++ "0x113f9804bef90dae","0x1b710b35131c471b", ++ "0x28db77f523047d84","0x32caab7b40c72493", ++ "0x3c9ebe0a15c9bebc","0x431d67c49c100d4c", ++ "0x4cc5d4becb3e42b6","0x597f299cfc657e2a", ++ "0x5fcb6fab3ad6faec","0x6c44198c4a475817","0"); ++$code.=<<___ if (!$LENDIAN); ++.quad 0x0001020304050607,0x1011121314151617 ++___ ++$code.=<<___ if ($LENDIAN); # quad-swapped ++.quad 0x1011121314151617,0x0001020304050607 ++___ ++} else { ++ local *table = sub { ++ foreach(@_) { $code.=".long $_,$_,$_,$_\n"; } ++ }; ++ table( ++ "0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5", ++ "0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5", ++ "0xd807aa98","0x12835b01","0x243185be","0x550c7dc3", ++ "0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174", ++ "0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc", ++ "0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da", ++ "0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7", ++ "0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967", ++ "0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13", ++ "0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85", ++ "0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3", ++ "0xd192e819","0xd6990624","0xf40e3585","0x106aa070", ++ "0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5", ++ "0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3", ++ "0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208", ++ "0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0"); ++$code.=<<___ if (!$LENDIAN); ++.long 0x00010203,0x10111213,0x10111213,0x10111213 ++.long 0x00010203,0x04050607,0x10111213,0x10111213 ++.long 0x00010203,0x04050607,0x08090a0b,0x10111213 ++___ ++$code.=<<___ if ($LENDIAN); # word-swapped ++.long 0x10111213,0x10111213,0x10111213,0x00010203 ++.long 0x10111213,0x10111213,0x04050607,0x00010203 ++.long 0x10111213,0x08090a0b,0x04050607,0x00010203 ++___ ++} ++$code.=<<___; ++.asciz "SHA${bits} for PowerISA 2.07, CRYPTOGAMS by " ++.align 2 ++___ ++ ++$code =~ s/\`([^\`]*)\`/eval $1/gem; ++print $code; ++close STDOUT; diff --git a/SOURCES/openssl-1.0.1e-rpmbuild.patch b/SOURCES/openssl-1.0.1e-rpmbuild.patch new file mode 100644 index 0000000..14b2ba9 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-rpmbuild.patch @@ -0,0 +1,112 @@ +diff -up openssl-1.0.1e/Configure.rpmbuild openssl-1.0.1e/Configure +--- openssl-1.0.1e/Configure.rpmbuild 2014-08-13 19:19:53.211005598 +0200 ++++ openssl-1.0.1e/Configure 2014-08-13 19:29:21.704099285 +0200 +@@ -345,24 +345,24 @@ my %table=( + #### + # *-generic* is endian-neutral target, but ./config is free to + # throw in -D[BL]_ENDIAN, whichever appropriate... +-"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +-"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"linux-generic32","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", ++"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", + # It's believed that majority of ARM toolchains predefine appropriate -march. + # If you compiler does not, do complement config command line with one! +-"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"linux-armv4", "gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", + #### IA-32 targets... + "linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +-"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", + "linux-aout", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -march=i486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out", + #### +-"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +-"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +-"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", +-"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"linux-generic64","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", ++"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", ++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", ++"linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", + "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +-"linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +-"linux64-s390x", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", ++"linux-x86_64", "gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", ++"linux64-s390x", "gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", + #### So called "highgprs" target for z/Architecture CPUs + # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see + # /proc/cpuinfo. The idea is to preserve most significant bits of +@@ -376,16 +376,17 @@ my %table=( + # ldconfig and run-time linker to autodiscover. Unfortunately it + # doesn't work just yet, because of couple of bugs in glibc + # sysdeps/s390/dl-procinfo.c affecting ldconfig and ld.so.1... +-"linux32-s390x", "gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/highgprs", ++"linux32-s390x", "gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::/highgprs", + #### SPARC Linux setups + # Ray Miller has patiently + # assisted with debugging of following two configs. +-"linux-sparcv8","gcc:-mv8 -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"linux-sparcv8","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", + # it's a real mess with -mcpu=ultrasparc option under Linux, but + # -Wa,-Av8plus should do the trick no matter what. +-"linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plus -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"linux-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", + # GCC 3.1 is a requirement +-"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", ++"linux64-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", ++"linux-aarch64","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64", + #### Alpha Linux with GNU C and Compaq C setups + # Special notes: + # - linux-alpha+bwx-gcc is ment to be used from ./config only. If you +@@ -399,8 +400,8 @@ my %table=( + # + # + # +-"linux-alpha-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +-"linux-alpha+bwx-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"linux-alpha-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", ++"linux-alpha+bwx-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)", + "linux-alpha-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", + "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}", + +@@ -1675,7 +1676,7 @@ while () + elsif ($shared_extension ne "" && $shared_extension =~ /^\.s([ol])\.[^\.]*\.[^\.]*$/) + { + my $sotmp = $1; +- s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_MAJOR) .s$sotmp/; ++ s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_SONAMEVER) .s$sotmp/; + } + elsif ($shared_extension ne "" && $shared_extension =~ /^\.[^\.]*\.[^\.]*\.dylib$/) + { +diff -up openssl-1.0.1e/Makefile.org.rpmbuild openssl-1.0.1e/Makefile.org +--- openssl-1.0.1e/Makefile.org.rpmbuild 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/Makefile.org 2014-08-13 19:19:53.218005759 +0200 +@@ -10,6 +10,7 @@ SHLIB_VERSION_HISTORY= + SHLIB_MAJOR= + SHLIB_MINOR= + SHLIB_EXT= ++SHLIB_SONAMEVER=10 + PLATFORM=dist + OPTIONS= + CONFIGURE_ARGS= +@@ -333,10 +334,9 @@ clean-shared: + link-shared: + @ set -e; for i in $(SHLIBDIRS); do \ + $(MAKE) -f $(HERE)/Makefile.shared -e $(BUILDENV) \ +- LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \ ++ LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \ + LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \ + symlink.$(SHLIB_TARGET); \ +- libs="$$libs -l$$i"; \ + done + + build-shared: do_$(SHLIB_TARGET) link-shared +@@ -347,7 +347,7 @@ do_$(SHLIB_TARGET): + libs="$(LIBKRB5) $$libs"; \ + fi; \ + $(CLEARENV) && $(MAKE) -f Makefile.shared -e $(BUILDENV) \ +- LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \ ++ LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \ + LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \ + LIBDEPS="$$libs $(EX_LIBS)" \ + link_a.$(SHLIB_TARGET); \ diff --git a/SOURCES/openssl-1.0.1e-sn-case.patch b/SOURCES/openssl-1.0.1e-sn-case.patch new file mode 100644 index 0000000..eb5955a --- /dev/null +++ b/SOURCES/openssl-1.0.1e-sn-case.patch @@ -0,0 +1,12 @@ +diff -up openssl-1.0.1e/apps/s_server.c.sn-case openssl-1.0.1e/apps/s_server.c +--- openssl-1.0.1e/apps/s_server.c.sn-case 2014-09-17 15:31:51.000000000 +0200 ++++ openssl-1.0.1e/apps/s_server.c 2014-09-17 15:43:04.619321492 +0200 +@@ -744,7 +744,7 @@ static int MS_CALLBACK ssl_servername_cb + + if (servername) + { +- if (strcmp(servername,p->servername)) ++ if (strcasecmp(servername,p->servername)) + return p->extension_error; + if (ctx2) + { diff --git a/SOURCES/openssl-1.0.1e-ssl2-no-ec.patch b/SOURCES/openssl-1.0.1e-ssl2-no-ec.patch new file mode 100644 index 0000000..81ad472 --- /dev/null +++ b/SOURCES/openssl-1.0.1e-ssl2-no-ec.patch @@ -0,0 +1,17 @@ +diff -up openssl-1.0.1e/ssl/s23_lib.c.ssl2noec openssl-1.0.1e/ssl/s23_lib.c +--- openssl-1.0.1e/ssl/s23_lib.c.ssl2noec 2013-02-11 16:26:04.000000000 +0100 ++++ openssl-1.0.1e/ssl/s23_lib.c 2014-05-06 15:51:54.053293674 +0200 +@@ -107,6 +107,13 @@ int ssl23_put_cipher_by_char(const SSL_C + long l; + + /* We can write SSLv2 and SSLv3 ciphers */ ++ /* but no ECC ciphers */ ++ if (c->algorithm_mkey == SSL_kECDHr || ++ c->algorithm_mkey == SSL_kECDHe || ++ c->algorithm_mkey == SSL_kEECDH || ++ c->algorithm_auth == SSL_aECDH || ++ c->algorithm_auth == SSL_aECDSA) ++ return 0; + if (p != NULL) + { + l=c->id; diff --git a/SOURCES/opensslconf-new.h b/SOURCES/opensslconf-new.h index cf22738..bd56c73 100644 --- a/SOURCES/opensslconf-new.h +++ b/SOURCES/opensslconf-new.h @@ -14,7 +14,12 @@ #elif defined(__ia64__) #include "opensslconf-ia64.h" #elif defined(__powerpc64__) +#include +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #include "opensslconf-ppc64.h" +#else +#include "opensslconf-ppc64le.h" +#endif #elif defined(__powerpc__) #include "opensslconf-ppc.h" #elif defined(__s390x__) diff --git a/SPECS/openssl.spec b/SPECS/openssl.spec index b01854b..4900dd3 100644 --- a/SPECS/openssl.spec +++ b/SPECS/openssl.spec @@ -23,7 +23,7 @@ Summary: Utilities from the general purpose cryptography library with TLS implementation Name: openssl Version: 1.0.1e -Release: 34%{?dist}.7 +Release: 42%{?dist} Epoch: 1 # We have to remove certain patented algorithms from the openssl source # tarball with the hobble-openssl script which is included below. @@ -40,7 +40,7 @@ Source11: README.FIPS Source12: ec_curve.c Source13: ectest.c # Build changes -Patch1: openssl-1.0.1-beta2-rpmbuild.patch +Patch1: openssl-1.0.1e-rpmbuild.patch Patch2: openssl-1.0.1e-defaults.patch Patch4: openssl-1.0.0-beta5-enginesdir.patch Patch5: openssl-0.9.8a-no-rpath.patch @@ -48,6 +48,8 @@ Patch6: openssl-0.9.8b-test-use-localhost.patch Patch7: openssl-1.0.0-timezone.patch Patch8: openssl-1.0.1c-perlfind.patch Patch9: openssl-1.0.1c-aliasing.patch +# This patch must be applied first +Patch10: openssl-1.0.1e-ppc-asm-update.patch # Bug fixes Patch23: openssl-1.0.1c-default-paths.patch Patch24: openssl-1.0.1e-issuer-hash.patch @@ -82,7 +84,10 @@ Patch76: openssl-1.0.1e-new-fips-reqs.patch Patch77: openssl-1.0.1e-weak-ciphers.patch Patch78: openssl-1.0.1e-3des-strength.patch Patch79: openssl-1.0.1e-req-keylen.patch +Patch41: openssl-1.0.1e-ssl2-no-ec.patch +Patch42: openssl-1.0.1e-enc-fail.patch # Backported fixes including security fixes +Patch80: openssl-1.0.1e-evp-wrap.patch Patch81: openssl-1.0.1-beta2-padlock64.patch Patch82: openssl-1.0.1e-backports.patch Patch83: openssl-1.0.1e-bad-mac.patch @@ -99,6 +104,7 @@ Patch93: openssl-1.0.1e-cve-2014-0198.patch Patch94: openssl-1.0.1e-cve-2014-0221.patch Patch95: openssl-1.0.1e-cve-2014-0224.patch Patch96: openssl-1.0.1e-cve-2014-3470.patch +Patch97: openssl-1.0.1e-dtls-ecc-ext.patch Patch100: openssl-1.0.1e-cve-2014-3505.patch Patch101: openssl-1.0.1e-cve-2014-3506.patch Patch102: openssl-1.0.1e-cve-2014-3507.patch @@ -106,9 +112,13 @@ Patch103: openssl-1.0.1e-cve-2014-3508.patch Patch104: openssl-1.0.1e-cve-2014-3509.patch Patch105: openssl-1.0.1e-cve-2014-3510.patch Patch106: openssl-1.0.1e-cve-2014-3511.patch +Patch107: openssl-1.0.1e-doc-ciphersuites.patch +Patch108: openssl-1.0.1e-sn-case.patch +Patch109: openssl-1.0.1e-ecdh-auto.patch Patch110: openssl-1.0.1e-cve-2014-3567.patch Patch111: openssl-1.0.1e-cve-2014-3513.patch Patch112: openssl-1.0.1e-fallback-scsv.patch +Patch113: openssl-1.0.1e-copy-algo.patch Patch114: openssl-1.0.1e-cve-2014-3570.patch Patch115: openssl-1.0.1e-cve-2014-3571.patch Patch116: openssl-1.0.1e-cve-2014-3572.patch @@ -116,6 +126,7 @@ Patch117: openssl-1.0.1e-cve-2014-8275.patch Patch118: openssl-1.0.1e-cve-2015-0204.patch Patch119: openssl-1.0.1e-cve-2015-0205.patch Patch120: openssl-1.0.1e-cve-2015-0206.patch +Patch121: openssl-1.0.1e-cc-reqs.patch License: OpenSSL Group: System Environment/Libraries @@ -187,6 +198,7 @@ from other formats to the formats used by the OpenSSL toolkit. cp %{SOURCE12} %{SOURCE13} crypto/ec/ +%patch10 -p1 -b .ppc-asm %patch1 -p1 -b .rpmbuild %patch2 -p1 -b .defaults %patch4 -p1 -b .enginesdir %{?_rawbuild} @@ -228,7 +240,10 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ %patch77 -p1 -b .weak-ciphers %patch78 -p1 -b .3des-strength %patch79 -p1 -b .keylen +%patch41 -p1 -b .ssl2-noec +%patch42 -p1 -b .enc-fail +%patch80 -p1 -b .wrap %patch81 -p1 -b .padlock64 %patch82 -p1 -b .backports %patch71 -p1 -b .manfix @@ -246,6 +261,7 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ %patch94 -p1 -b .dtls1-dos %patch95 -p1 -b .keying-mitm %patch96 -p1 -b .anon-ecdh-dos +%patch97 -p1 -b .dtls-ecc-ext %patch100 -p1 -b .dtls-doublefree %patch101 -p1 -b .dtls-sizechecks %patch102 -p1 -b .dtls-memleak @@ -253,9 +269,13 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ %patch104 -p1 -b .tlsext-race %patch105 -p1 -b .adh-dos %patch106 -p1 -b .frag-downgrade +%patch107 -p1 -b .doc-ciphersuites +%patch108 -p1 -b .sn-case +%patch109 -p1 -b .ecdh-auto %patch110 -p1 -b .ticket-leak %patch111 -p1 -b .srtp-leak %patch112 -p1 -b .fallback-scsv +%patch113 -p1 -b .copy-algo %patch114 -p1 -b .bn-sqr %patch115 -p1 -b .dtls1-reads %patch116 -p1 -b .ecdh-downgrade @@ -263,6 +283,7 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/ %patch118 -p1 -b .rsa-ephemeral %patch119 -p1 -b .dh-unauthenticated %patch120 -p1 -b .dtls-rec-leak +%patch121 -p1 -b .cc-reqs sed -i 's/SHLIB_VERSION_NUMBER "1.0.0"/SHLIB_VERSION_NUMBER "%{version}"/' crypto/opensslv.h @@ -306,9 +327,12 @@ sslarch=linux-armv4 %ifarch sh3 sh4 sslarch=linux-generic32 %endif -%ifarch %{power64} +%ifarch ppc64 ppc64p7 sslarch=linux-ppc64 %endif +%ifarch ppc64le +sslarch="linux-ppc64le" +%endif # ia64, x86_64, ppc are OK by default # Configure the build tree. Override OpenSSL defaults with known-good defaults @@ -526,7 +550,11 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/fipscanister.* %postun libs -p /sbin/ldconfig %changelog -* Tue Jan 13 2015 Tomáš Mráz 1.0.1e-34.7 +* Thu Jan 15 2015 Tomáš Mráz 1.0.1e-42 +- test in the non-FIPS RSA keygen for minimal distance of p and q + similarly to the FIPS RSA keygen + +* Tue Jan 13 2015 Tomáš Mráz 1.0.1e-41 - fix CVE-2014-3570 - incorrect computation in BN_sqr() - fix CVE-2014-3571 - possible crash in dtls1_get_record() - fix CVE-2014-3572 - possible downgrade of ECDH ciphersuite to non-PFS state @@ -536,13 +564,38 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/fipscanister.* - fix CVE-2015-0205 - do not allow unauthenticated client DH certificate - fix CVE-2015-0206 - possible memory leak when buffering DTLS records -* Wed Oct 15 2014 Tomáš Mráz 1.0.1e-34.6 +* Tue Oct 21 2014 Tomáš Mráz 1.0.1e-40 +- use FIPS approved method for computation of d in RSA +- copy digest algorithm when handling SNI context switch + +* Wed Oct 15 2014 Tomáš Mráz 1.0.1e-39 - fix CVE-2014-3567 - memory leak when handling session tickets - fix CVE-2014-3513 - memory leak in srtp support - add support for fallback SCSV to partially mitigate CVE-2014-3566 (padding attack on SSL3) -* Fri Aug 8 2014 Tomáš Mráz 1.0.1e-34.4 +* Wed Sep 24 2014 Tomáš Mráz 1.0.1e-38 +- do FIPS algorithm selftest before the integrity check + +* Thu Sep 18 2014 Tomáš Mráz 1.0.1e-37 +- add support for RFC 5649 (#1119738) +- do not pass the FIPS integrity check if the .hmac files are empty (#1128849) +- add ECC TLS extensions to DTLS (#1119803) +- do not send ECC ciphersuites in SSLv2 client hello (#1090955) +- properly propagate encryption failure in BIO_f_cipher (#1072439) +- fix CVE-2014-0224 fix that broke EAP-FAST session resumption support +- improve documentation of ciphersuites - patch by Hubert Kario (#1108026) +- use case insensitive comparison for servername in s_server (#1081163) +- add support for automatic ECDH curve selection on server (#1080128) +- FIPS mode: make the limitations on DSA, DH, and RSA keygen + length enforced only if OPENSSL_ENFORCE_MODULUS_BITS environment + variable is set + +* Wed Aug 13 2014 Tomáš Mráz 1.0.1e-36 +- add support for ppc64le architecture +- add Power 8 optimalizations + +* Fri Aug 8 2014 Tomáš Mráz 1.0.1e-35 - fix CVE-2014-3505 - doublefree in DTLS packet processing - fix CVE-2014-3506 - avoid memory exhaustion in DTLS - fix CVE-2014-3507 - avoid memory leak in DTLS