diff --git a/SOURCES/openssl-1.0.1-beta2-rpmbuild.patch b/SOURCES/openssl-1.0.1-beta2-rpmbuild.patch
deleted file mode 100644
index a4bb691..0000000
--- a/SOURCES/openssl-1.0.1-beta2-rpmbuild.patch
+++ /dev/null
@@ -1,110 +0,0 @@
-diff -up openssl-1.0.1-beta2/Configure.rpmbuild openssl-1.0.1-beta2/Configure
---- openssl-1.0.1-beta2/Configure.rpmbuild	2012-01-05 01:07:34.000000000 +0100
-+++ openssl-1.0.1-beta2/Configure	2012-02-02 12:43:56.547409325 +0100
-@@ -343,23 +343,23 @@ my %table=(
- ####
- # *-generic* is endian-neutral target, but ./config is free to
- # throw in -D[BL]_ENDIAN, whichever appropriate...
--"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
--"linux-ppc",	"gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-+"linux-generic32","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
-+"linux-ppc",	"gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
- # It's believed that majority of ARM toolchains predefine appropriate -march.
- # If you compiler does not, do complement config command line with one!
--"linux-armv4",	"gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-+"linux-armv4",	"gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
- #### IA-32 targets...
- "linux-ia32-icc",	"icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
--"linux-elf",	"gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-+"linux-elf",	"gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
- "linux-aout",	"gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -march=i486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out",
- ####
--"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
--"linux-ppc64",	"gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
--"linux-ia64",	"gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-+"linux-generic64","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
-+"linux-ppc64",	"gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
-+"linux-ia64",	"gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
- "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
- "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
--"linux-x86_64",	"gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
--"linux64-s390x",	"gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
-+"linux-x86_64",	"gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
-+"linux64-s390x",	"gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
- #### So called "highgprs" target for z/Architecture CPUs
- # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see
- # /proc/cpuinfo. The idea is to preserve most significant bits of
-@@ -373,16 +373,17 @@ my %table=(
- # ldconfig and run-time linker to autodiscover. Unfortunately it
- # doesn't work just yet, because of couple of bugs in glibc
- # sysdeps/s390/dl-procinfo.c affecting ldconfig and ld.so.1...
--"linux32-s390x",	"gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/highgprs",
-+"linux32-s390x",	"gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::/highgprs",
- #### SPARC Linux setups
- # Ray Miller <ray.miller@computing-services.oxford.ac.uk> has patiently
- # assisted with debugging of following two configs.
--"linux-sparcv8","gcc:-mv8 -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-+"linux-sparcv8","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
- # it's a real mess with -mcpu=ultrasparc option under Linux, but
- # -Wa,-Av8plus should do the trick no matter what.
--"linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plus -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-+"linux-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
- # GCC 3.1 is a requirement
--"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
-+"linux64-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
-+"linux-aarch64","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
- #### Alpha Linux with GNU C and Compaq C setups
- # Special notes:
- # - linux-alpha+bwx-gcc is ment to be used from ./config only. If you
-@@ -396,8 +397,8 @@ my %table=(
- #
- #					<appro@fy.chalmers.se>
- #
--"linux-alpha-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
--"linux-alpha+bwx-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-+"linux-alpha-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
-+"linux-alpha+bwx-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
- "linux-alpha-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
- "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
- 
-@@ -1678,7 +1679,7 @@ while (<IN>)
- 	elsif ($shared_extension ne "" && $shared_extension =~ /^\.s([ol])\.[^\.]*\.[^\.]*$/)
- 		{
- 		my $sotmp = $1;
--		s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_MAJOR) .s$sotmp/;
-+		s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_SONAMEVER) .s$sotmp/;
- 		}
- 	elsif ($shared_extension ne "" && $shared_extension =~ /^\.[^\.]*\.[^\.]*\.dylib$/)
- 		{
-diff -up openssl-1.0.1-beta2/Makefile.org.rpmbuild openssl-1.0.1-beta2/Makefile.org
---- openssl-1.0.1-beta2/Makefile.org.rpmbuild	2011-12-27 16:17:50.000000000 +0100
-+++ openssl-1.0.1-beta2/Makefile.org	2012-02-02 12:30:23.652495435 +0100
-@@ -10,6 +10,7 @@ SHLIB_VERSION_HISTORY=
- SHLIB_MAJOR=
- SHLIB_MINOR=
- SHLIB_EXT=
-+SHLIB_SONAMEVER=10
- PLATFORM=dist
- OPTIONS=
- CONFIGURE_ARGS=
-@@ -333,10 +334,9 @@ clean-shared:
- link-shared:
- 	@ set -e; for i in $(SHLIBDIRS); do \
- 		$(MAKE) -f $(HERE)/Makefile.shared -e $(BUILDENV) \
--			LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \
-+			LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \
- 			LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \
- 			symlink.$(SHLIB_TARGET); \
--		libs="$$libs -l$$i"; \
- 	done
- 
- build-shared: do_$(SHLIB_TARGET) link-shared
-@@ -347,7 +347,7 @@ do_$(SHLIB_TARGET):
- 			libs="$(LIBKRB5) $$libs"; \
- 		fi; \
- 		$(CLEARENV) && $(MAKE) -f Makefile.shared -e $(BUILDENV) \
--			LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \
-+			LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \
- 			LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \
- 			LIBDEPS="$$libs $(EX_LIBS)" \
- 			link_a.$(SHLIB_TARGET); \
diff --git a/SOURCES/openssl-1.0.1e-cc-reqs.patch b/SOURCES/openssl-1.0.1e-cc-reqs.patch
new file mode 100644
index 0000000..e266bb1
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-cc-reqs.patch
@@ -0,0 +1,25 @@
+diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.cc-reqs openssl-1.0.1e/crypto/rsa/rsa_gen.c
+--- openssl-1.0.1e/crypto/rsa/rsa_gen.c.cc-reqs	2015-01-13 12:45:51.000000000 +0100
++++ openssl-1.0.1e/crypto/rsa/rsa_gen.c	2015-01-15 17:35:04.649697922 +0100
+@@ -438,6 +438,10 @@ static int rsa_builtin_keygen(RSA *rsa,
+ 	if(!rsa->dmq1 && ((rsa->dmq1=BN_new()) == NULL)) goto err;
+ 	if(!rsa->iqmp && ((rsa->iqmp=BN_new()) == NULL)) goto err;
+ 
++	/* prepare minimum p and q difference */
++	if (!BN_one(r3)) goto err;
++	if (bitsp > 100 && !BN_lshift(r3, r3, bitsp - 100)) goto err;
++
+ 	BN_copy(rsa->e, e_value);
+ 
+ 	/* generate p and q */
+@@ -463,7 +467,9 @@ static int rsa_builtin_keygen(RSA *rsa,
+ 			{
+ 			if(!BN_generate_prime_ex(rsa->q, bitsq, 0, NULL, NULL, cb))
+ 				goto err;
+-			} while((BN_cmp(rsa->p, rsa->q) == 0) && (++degenerate < 3));
++	       		if (!BN_sub(r2, rsa->q, rsa->p))
++				goto err;
++			} while((BN_ucmp(r2, r3) <= 0) && (++degenerate < 3));
+ 		if(degenerate == 3)
+ 			{
+ 			ok = 0; /* we set our own err */
diff --git a/SOURCES/openssl-1.0.1e-copy-algo.patch b/SOURCES/openssl-1.0.1e-copy-algo.patch
new file mode 100644
index 0000000..927c584
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-copy-algo.patch
@@ -0,0 +1,33 @@
+diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
+index 6a33b9d..76a5f9e 100644
+--- a/ssl/ssl_lib.c
++++ b/ssl/ssl_lib.c
+@@ -3177,15 +3177,26 @@ SSL_CTX *SSL_get_SSL_CTX(const SSL *ssl)
+ 
+ SSL_CTX *SSL_set_SSL_CTX(SSL *ssl, SSL_CTX* ctx)
+ 	{
++	CERT *ocert = ssl->cert;
+ 	if (ssl->ctx == ctx)
+ 		return ssl->ctx;
+ #ifndef OPENSSL_NO_TLSEXT
+ 	if (ctx == NULL)
+ 		ctx = ssl->initial_ctx;
+ #endif
+-	if (ssl->cert != NULL)
+-		ssl_cert_free(ssl->cert);
+ 	ssl->cert = ssl_cert_dup(ctx->cert);
++	if (ocert)
++		{
++		int i;
++		/* Copy negotiated digests from original */
++		for (i = 0; i < SSL_PKEY_NUM; i++)
++			{
++			CERT_PKEY *cpk = ocert->pkeys + i;
++			CERT_PKEY *rpk = ssl->cert->pkeys + i;
++			rpk->digest = cpk->digest;
++			}
++		ssl_cert_free(ocert);
++		}
+ 	CRYPTO_add(&ctx->references,1,CRYPTO_LOCK_SSL_CTX);
+ 	if (ssl->ctx != NULL)
+ 		SSL_CTX_free(ssl->ctx); /* decrement reference count */
diff --git a/SOURCES/openssl-1.0.1e-cve-2014-0224.patch b/SOURCES/openssl-1.0.1e-cve-2014-0224.patch
index 173f0e1..05e7e79 100644
--- a/SOURCES/openssl-1.0.1e-cve-2014-0224.patch
+++ b/SOURCES/openssl-1.0.1e-cve-2014-0224.patch
@@ -12,7 +12,15 @@ diff -up openssl-1.0.1e/ssl/ssl3.h.keying-mitm openssl-1.0.1e/ssl/ssl3.h
 diff -up openssl-1.0.1e/ssl/s3_clnt.c.keying-mitm openssl-1.0.1e/ssl/s3_clnt.c
 --- openssl-1.0.1e/ssl/s3_clnt.c.keying-mitm	2013-02-11 16:26:04.000000000 +0100
 +++ openssl-1.0.1e/ssl/s3_clnt.c	2014-06-02 19:49:57.042701985 +0200
-@@ -559,6 +559,7 @@ int ssl3_connect(SSL *s)
+@@ -510,6 +510,7 @@ int ssl3_connect(SSL *s)
+ 				s->method->ssl3_enc->client_finished_label,
+ 				s->method->ssl3_enc->client_finished_label_len);
+ 			if (ret <= 0) goto end;
++			s->s3->flags |= SSL3_FLAGS_CCS_OK;
+ 			s->state=SSL3_ST_CW_FLUSH;
+ 
+ 			/* clear flags */
+@@ -559,6 +560,7 @@ int ssl3_connect(SSL *s)
  		case SSL3_ST_CR_FINISHED_A:
  		case SSL3_ST_CR_FINISHED_B:
  
@@ -20,7 +28,15 @@ diff -up openssl-1.0.1e/ssl/s3_clnt.c.keying-mitm openssl-1.0.1e/ssl/s3_clnt.c
  			ret=ssl3_get_finished(s,SSL3_ST_CR_FINISHED_A,
  				SSL3_ST_CR_FINISHED_B);
  			if (ret <= 0) goto end;
-@@ -916,6 +917,7 @@ int ssl3_get_server_hello(SSL *s)
+@@ -901,6 +903,7 @@ int ssl3_get_server_hello(SSL *s)
+ 			{
+ 			s->session->cipher = pref_cipher ?
+ 				pref_cipher : ssl_get_cipher_by_char(s, p+j);
++			s->s3->flags |= SSL3_FLAGS_CCS_OK;
+ 			}
+ 		}
+ #endif /* OPENSSL_NO_TLSEXT */
+@@ -916,6 +918,7 @@ int ssl3_get_server_hello(SSL *s)
  		SSLerr(SSL_F_SSL3_GET_SERVER_HELLO,SSL_R_ATTEMPT_TO_REUSE_SESSION_IN_DIFFERENT_CONTEXT);
  		goto f_err;
  		}
diff --git a/SOURCES/openssl-1.0.1e-doc-ciphersuites.patch b/SOURCES/openssl-1.0.1e-doc-ciphersuites.patch
new file mode 100644
index 0000000..418b9e1
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-doc-ciphersuites.patch
@@ -0,0 +1,304 @@
+From 87887a7a658bf305bfe6619eedcbc6c3972cc188 Mon Sep 17 00:00:00 2001
+From: Hubert Kario <hkario@redhat.com>
+Date: Tue, 10 Jun 2014 14:13:33 +0200
+Subject: [PATCH] backport changes to ciphers(1) man page
+
+Backport of the patch:
+add ECC strings to ciphers(1), point out difference between DH and ECDH
+and few other changes applicable to the 1.0.1 code base.
+
+ * Make a clear distinction between DH and ECDH key exchange.
+ * Group all key exchange cipher suite identifiers, first DH then ECDH
+ * add descriptions for all supported *DH* identifiers
+ * add ECDSA authentication descriptions
+ * add example showing how to disable all suites that offer no
+   authentication or encryption
+ * backport listing of elliptic curve cipher suites.
+ * backport listing of TLS 1.2 cipher suites, add note that DH_RSA
+   and DH_DSS is not implemented in this version
+ * backport of description of PSK and listing of PSK cipher suites
+ * backport description of AES128, AES256 and AESGCM options
+ * backport description of CAMELLIA128, CAMELLIA256 options
+---
+ doc/apps/ciphers.pod |  195 ++++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 173 insertions(+), 22 deletions(-)
+
+diff --git a/doc/apps/ciphers.pod b/doc/apps/ciphers.pod
+index f44aa00..6086d0a 100644
+--- a/doc/apps/ciphers.pod
++++ b/doc/apps/ciphers.pod
+@@ -36,7 +36,7 @@ SSL v2 and for SSL v3/TLS v1.
+ 
+ =item B<-V>
+ 
+-Like B<-V>, but include cipher suite codes in output (hex format).
++Like B<-v>, but include cipher suite codes in output (hex format).
+ 
+ =item B<-ssl3>
+ 
+@@ -116,8 +116,8 @@ specified.
+ =item B<COMPLEMENTOFDEFAULT>
+ 
+ the ciphers included in B<ALL>, but not enabled by default. Currently
+-this is B<ADH>. Note that this rule does not cover B<eNULL>, which is
+-not included by B<ALL> (use B<COMPLEMENTOFALL> if necessary).
++this is B<ADH> and B<AECDH>. Note that this rule does not cover B<eNULL>,
++which is not included by B<ALL> (use B<COMPLEMENTOFALL> if necessary).
+ 
+ =item B<ALL>
+ 
+@@ -165,21 +165,58 @@ included.
+ =item B<aNULL>
+ 
+ the cipher suites offering no authentication. This is currently the anonymous
+-DH algorithms. These cipher suites are vulnerable to a "man in the middle"
+-attack and so their use is normally discouraged.
++DH algorithms and anonymous ECDH algorithms. These cipher suites are vulnerable
++to a "man in the middle" attack and so their use is normally discouraged.
+ 
+ =item B<kRSA>, B<RSA>
+ 
+ cipher suites using RSA key exchange.
+ 
++=item B<kDHr>, B<kDHd>, B<kDH>
++
++cipher suites using DH key agreement and DH certificates signed by CAs with RSA
++and DSS keys or either respectively. Not implemented.
++
+ =item B<kEDH>
+ 
+-cipher suites using ephemeral DH key agreement.
++cipher suites using ephemeral DH key agreement, including anonymous cipher
++suites.
+ 
+-=item B<kDHr>, B<kDHd>
++=item B<EDH>
+ 
+-cipher suites using DH key agreement and DH certificates signed by CAs with RSA
+-and DSS keys respectively. Not implemented.
++cipher suites using authenticated ephemeral DH key agreement.
++
++=item B<ADH>
++
++anonymous DH cipher suites, note that this does not include anonymous Elliptic
++Curve DH (ECDH) cipher suites.
++
++=item B<DH>
++
++cipher suites using DH, including anonymous DH, ephemeral DH and fixed DH.
++
++=item B<kECDHr>, B<kECDHe>, B<kECDH>
++
++cipher suites using fixed ECDH key agreement signed by CAs with RSA and ECDSA
++keys or either respectively.
++
++=item B<kEECDH>
++
++cipher suites using ephemeral ECDH key agreement, including anonymous
++cipher suites.
++
++=item B<EECDHE>
++
++cipher suites using authenticated ephemeral ECDH key agreement.
++
++=item B<AECDH>
++
++anonymous Elliptic Curve Diffie Hellman cipher suites.
++
++=item B<ECDH>
++
++cipher suites using ECDH key exchange, including anonymous, ephemeral and
++fixed ECDH.
+ 
+ =item B<aRSA>
+ 
+@@ -194,30 +231,39 @@ cipher suites using DSS authentication, i.e. the certificates carry DSS keys.
+ cipher suites effectively using DH authentication, i.e. the certificates carry
+ DH keys.  Not implemented.
+ 
++=item B<aECDH>
++
++cipher suites effectively using ECDH authentication, i.e. the certificates
++carry ECDH keys.
++
++=item B<aECDSA>, B<ECDSA>
++
++cipher suites using ECDSA authentication, i.e. the certificates carry ECDSA
++keys.
++
+ =item B<kFZA>, B<aFZA>, B<eFZA>, B<FZA>
+ 
+ ciphers suites using FORTEZZA key exchange, authentication, encryption or all
+ FORTEZZA algorithms. Not implemented.
+ 
+-=item B<TLSv1>, B<SSLv3>, B<SSLv2>
+-
+-TLS v1.0, SSL v3.0 or SSL v2.0 cipher suites respectively.
++=item B<TLSv1.2>, B<TLSv1>, B<SSLv3>, B<SSLv2>
+ 
+-=item B<DH>
+-
+-cipher suites using DH, including anonymous DH.
++TLS v1.2, TLS v1.0, SSL v3.0 or SSL v2.0 cipher suites respectively. Note:
++there are no ciphersuites specific to TLS v1.1.
+ 
+-=item B<ADH>
++=item B<AES128>, B<AES256>, B<AES>
+ 
+-anonymous DH cipher suites.
++cipher suites using 128 bit AES, 256 bit AES or either 128 or 256 bit AES.
+ 
+-=item B<AES>
++=item B<AESGCM>
+ 
+-cipher suites using AES.
++AES in Galois Counter Mode (GCM): these ciphersuites are only supported
++in TLS v1.2.
+ 
+-=item B<CAMELLIA>
++=item B<CAMELLIA128>, B<CAMELLIA256>, B<CAMELLIA>
+ 
+-cipher suites using Camellia.
++cipher suites using 128 bit CAMELLIA, 256 bit CAMELLIA or either 128 or 256 bit
++CAMELLIA.
+ 
+ =item B<3DES>
+ 
+@@ -251,6 +297,10 @@ cipher suites using MD5.
+ 
+ cipher suites using SHA1.
+ 
++=item B<SHA256>, B<SHA384>
++
++ciphersuites using SHA256 or SHA384.
++
+ =item B<aGOST> 
+ 
+ cipher suites using GOST R 34.10 (either 2001 or 94) for authenticaction
+@@ -277,6 +327,9 @@ cipher suites, using HMAC based on GOST R 34.11-94.
+ 
+ cipher suites using GOST 28147-89 MAC B<instead of> HMAC.
+ 
++=item B<PSK>
++
++cipher suites using pre-shared keys (PSK).
+ 
+ =back
+ 
+@@ -423,7 +476,100 @@ Note: these ciphers can also be used in SSL v3.
+  TLS_DHE_DSS_EXPORT1024_WITH_RC4_56_SHA  EXP1024-DHE-DSS-RC4-SHA
+  TLS_DHE_DSS_WITH_RC4_128_SHA            DHE-DSS-RC4-SHA
+ 
+-=head2 SSL v2.0 cipher suites.
++=head2 Elliptic curve cipher suites.
++
++ TLS_ECDH_RSA_WITH_NULL_SHA              ECDH-RSA-NULL-SHA
++ TLS_ECDH_RSA_WITH_RC4_128_SHA           ECDH-RSA-RC4-SHA
++ TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA      ECDH-RSA-DES-CBC3-SHA
++ TLS_ECDH_RSA_WITH_AES_128_CBC_SHA       ECDH-RSA-AES128-SHA
++ TLS_ECDH_RSA_WITH_AES_256_CBC_SHA       ECDH-RSA-AES256-SHA
++
++ TLS_ECDH_ECDSA_WITH_NULL_SHA            ECDH-ECDSA-NULL-SHA
++ TLS_ECDH_ECDSA_WITH_RC4_128_SHA         ECDH-ECDSA-RC4-SHA
++ TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA    ECDH-ECDSA-DES-CBC3-SHA
++ TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA     ECDH-ECDSA-AES128-SHA
++ TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA     ECDH-ECDSA-AES256-SHA
++
++ TLS_ECDHE_RSA_WITH_NULL_SHA             ECDHE-RSA-NULL-SHA
++ TLS_ECDHE_RSA_WITH_RC4_128_SHA          ECDHE-RSA-RC4-SHA
++ TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA     ECDHE-RSA-DES-CBC3-SHA
++ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA      ECDHE-RSA-AES128-SHA
++ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA      ECDHE-RSA-AES256-SHA
++
++ TLS_ECDHE_ECDSA_WITH_NULL_SHA           ECDHE-ECDSA-NULL-SHA
++ TLS_ECDHE_ECDSA_WITH_RC4_128_SHA        ECDHE-ECDSA-RC4-SHA
++ TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA   ECDHE-ECDSA-DES-CBC3-SHA
++ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA    ECDHE-ECDSA-AES128-SHA
++ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA    ECDHE-ECDSA-AES256-SHA
++
++ TLS_ECDH_anon_WITH_NULL_SHA             AECDH-NULL-SHA
++ TLS_ECDH_anon_WITH_RC4_128_SHA          AECDH-RC4-SHA
++ TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA     AECDH-DES-CBC3-SHA
++ TLS_ECDH_anon_WITH_AES_128_CBC_SHA      AECDH-AES128-SHA
++ TLS_ECDH_anon_WITH_AES_256_CBC_SHA      AECDH-AES256-SHA
++
++=head2 TLS v1.2 cipher suites
++
++ TLS_RSA_WITH_NULL_SHA256                  NULL-SHA256
++
++ TLS_RSA_WITH_AES_128_CBC_SHA256           AES128-SHA256
++ TLS_RSA_WITH_AES_256_CBC_SHA256           AES256-SHA256
++ TLS_RSA_WITH_AES_128_GCM_SHA256           AES128-GCM-SHA256
++ TLS_RSA_WITH_AES_256_GCM_SHA384           AES256-GCM-SHA384
++
++ TLS_DH_RSA_WITH_AES_128_CBC_SHA256        Not implemented.
++ TLS_DH_RSA_WITH_AES_256_CBC_SHA256        Not implemented.
++ TLS_DH_RSA_WITH_AES_128_GCM_SHA256        Not implemented.
++ TLS_DH_RSA_WITH_AES_256_GCM_SHA384        Not implemented.
++
++ TLS_DH_DSS_WITH_AES_128_CBC_SHA256        Not implemented.
++ TLS_DH_DSS_WITH_AES_256_CBC_SHA256        Not implemented.
++ TLS_DH_DSS_WITH_AES_128_GCM_SHA256        Not implemented.
++ TLS_DH_DSS_WITH_AES_256_GCM_SHA384        Not implemented.
++
++ TLS_DHE_RSA_WITH_AES_128_CBC_SHA256       DHE-RSA-AES128-SHA256
++ TLS_DHE_RSA_WITH_AES_256_CBC_SHA256       DHE-RSA-AES256-SHA256
++ TLS_DHE_RSA_WITH_AES_128_GCM_SHA256       DHE-RSA-AES128-GCM-SHA256
++ TLS_DHE_RSA_WITH_AES_256_GCM_SHA384       DHE-RSA-AES256-GCM-SHA384
++
++ TLS_DHE_DSS_WITH_AES_128_CBC_SHA256       DHE-DSS-AES128-SHA256
++ TLS_DHE_DSS_WITH_AES_256_CBC_SHA256       DHE-DSS-AES256-SHA256
++ TLS_DHE_DSS_WITH_AES_128_GCM_SHA256       DHE-DSS-AES128-GCM-SHA256
++ TLS_DHE_DSS_WITH_AES_256_GCM_SHA384       DHE-DSS-AES256-GCM-SHA384
++
++ TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256      ECDH-RSA-AES128-SHA256
++ TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384      ECDH-RSA-AES256-SHA384
++ TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256      ECDH-RSA-AES128-GCM-SHA256
++ TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384      ECDH-RSA-AES256-GCM-SHA384
++
++ TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256    ECDH-ECDSA-AES128-SHA256
++ TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384    ECDH-ECDSA-AES256-SHA384
++ TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256    ECDH-ECDSA-AES128-GCM-SHA256
++ TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384    ECDH-ECDSA-AES256-GCM-SHA384
++
++ TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256     ECDHE-RSA-AES128-SHA256
++ TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384     ECDHE-RSA-AES256-SHA384
++ TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256     ECDHE-RSA-AES128-GCM-SHA256
++ TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384     ECDHE-RSA-AES256-GCM-SHA384
++
++ TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256   ECDHE-ECDSA-AES128-SHA256
++ TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384   ECDHE-ECDSA-AES256-SHA384
++ TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256   ECDHE-ECDSA-AES128-GCM-SHA256
++ TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384   ECDHE-ECDSA-AES256-GCM-SHA384
++
++ TLS_DH_anon_WITH_AES_128_CBC_SHA256       ADH-AES128-SHA256
++ TLS_DH_anon_WITH_AES_256_CBC_SHA256       ADH-AES256-SHA256
++ TLS_DH_anon_WITH_AES_128_GCM_SHA256       ADH-AES128-GCM-SHA256
++ TLS_DH_anon_WITH_AES_256_GCM_SHA384       ADH-AES256-GCM-SHA384
++
++=head2 Pre shared keying (PSK) cipheruites
++
++ TLS_PSK_WITH_RC4_128_SHA                  PSK-RC4-SHA
++ TLS_PSK_WITH_3DES_EDE_CBC_SHA             PSK-3DES-EDE-CBC-SHA
++ TLS_PSK_WITH_AES_128_CBC_SHA              PSK-AES128-CBC-SHA
++ TLS_PSK_WITH_AES_256_CBC_SHA              PSK-AES256-CBC-SHA
++
++=head2 Deprecated SSL v2.0 cipher suites.
+ 
+  SSL_CK_RC4_128_WITH_MD5                 RC4-MD5
+  SSL_CK_RC4_128_EXPORT40_WITH_MD5        EXP-RC4-MD5
+@@ -452,6 +598,11 @@ strength:
+ 
+  openssl ciphers -v 'ALL:!ADH:@STRENGTH'
+ 
++Include all ciphers except ones with no encryption (eNULL) or no
++authentication (aNULL):
++
++ openssl ciphers -v 'ALL:!aNULL'
++
+ Include only 3DES ciphers and then place RSA ciphers last:
+ 
+  openssl ciphers -v '3DES:+RSA'
+-- 
+1.7.9.5
+
diff --git a/SOURCES/openssl-1.0.1e-dtls-ecc-ext.patch b/SOURCES/openssl-1.0.1e-dtls-ecc-ext.patch
new file mode 100644
index 0000000..2a002cc
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-dtls-ecc-ext.patch
@@ -0,0 +1,119 @@
+From 2054eb771ea29378f90d3a77c2f4015b17de702d Mon Sep 17 00:00:00 2001
+From: "Dr. Stephen Henson" <steve@openssl.org>
+Date: Tue, 15 Jul 2014 12:20:30 +0100
+Subject: [PATCH] Add ECC extensions with DTLS.
+
+PR#3449
+---
+ ssl/d1_clnt.c |  8 +++++++-
+ ssl/d1_srvr.c |  5 +++++
+ ssl/t1_lib.c  | 18 ++++++------------
+ 3 files changed, 18 insertions(+), 13 deletions(-)
+
+diff --git a/ssl/d1_clnt.c b/ssl/d1_clnt.c
+index 48e5e06..65dbb4a 100644
+--- a/ssl/d1_clnt.c
++++ b/ssl/d1_clnt.c
+@@ -876,12 +876,18 @@ int dtls1_client_hello(SSL *s)
+ 		*(p++)=0; /* Add the NULL method */
+ 
+ #ifndef OPENSSL_NO_TLSEXT
++		/* TLS extensions*/
++		if (ssl_prepare_clienthello_tlsext(s) <= 0)
++			{
++			SSLerr(SSL_F_DTLS1_CLIENT_HELLO,SSL_R_CLIENTHELLO_TLSEXT);
++			goto err;
++			}
+ 		if ((p = ssl_add_clienthello_tlsext(s, p, buf+SSL3_RT_MAX_PLAIN_LENGTH)) == NULL)
+ 			{
+ 			SSLerr(SSL_F_DTLS1_CLIENT_HELLO,ERR_R_INTERNAL_ERROR);
+ 			goto err;
+ 			}
+-#endif		
++#endif
+ 
+ 		l=(p-d);
+ 		d=buf;
+diff --git a/ssl/d1_srvr.c b/ssl/d1_srvr.c
+index 1384ab0..ef9c347 100644
+--- a/ssl/d1_srvr.c
++++ b/ssl/d1_srvr.c
+@@ -980,6 +980,11 @@ int dtls1_send_server_hello(SSL *s)
+ #endif
+ 
+ #ifndef OPENSSL_NO_TLSEXT
++		if (ssl_prepare_serverhello_tlsext(s) <= 0)
++			{
++			SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO,SSL_R_SERVERHELLO_TLSEXT);
++			return -1;
++			}
+ 		if ((p = ssl_add_serverhello_tlsext(s, p, buf+SSL3_RT_MAX_PLAIN_LENGTH)) == NULL)
+ 			{
+ 			SSLerr(SSL_F_DTLS1_SEND_SERVER_HELLO,ERR_R_INTERNAL_ERROR);
+diff --git a/ssl/t1_lib.c b/ssl/t1_lib.c
+index f6a480d..8167a51 100644
+--- a/ssl/t1_lib.c
++++ b/ssl/t1_lib.c
+@@ -453,8 +453,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, unsigned c
+ #endif
+ 
+ #ifndef OPENSSL_NO_EC
+-	if (s->tlsext_ecpointformatlist != NULL &&
+-	    s->version != DTLS1_VERSION)
++	if (s->tlsext_ecpointformatlist != NULL)
+ 		{
+ 		/* Add TLS extension ECPointFormats to the ClientHello message */
+ 		long lenmax; 
+@@ -473,8 +472,7 @@ unsigned char *ssl_add_clienthello_tlsext(SSL *s, unsigned char *buf, unsigned c
+ 		memcpy(ret, s->tlsext_ecpointformatlist, s->tlsext_ecpointformatlist_length);
+ 		ret+=s->tlsext_ecpointformatlist_length;
+ 		}
+-	if (s->tlsext_ellipticcurvelist != NULL &&
+-	    s->version != DTLS1_VERSION)
++	if (s->tlsext_ellipticcurvelist != NULL)
+ 		{
+ 		/* Add TLS extension EllipticCurves to the ClientHello message */
+ 		long lenmax; 
+@@ -750,8 +748,7 @@ unsigned char *ssl_add_serverhello_tlsext(SSL *s, unsigned char *buf, unsigned c
+         }
+ 
+ #ifndef OPENSSL_NO_EC
+-	if (s->tlsext_ecpointformatlist != NULL &&
+-	    s->version != DTLS1_VERSION)
++	if (s->tlsext_ecpointformatlist != NULL)
+ 		{
+ 		/* Add TLS extension ECPointFormats to the ServerHello message */
+ 		long lenmax; 
+@@ -1154,8 +1151,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ #endif
+ 
+ #ifndef OPENSSL_NO_EC
+-		else if (type == TLSEXT_TYPE_ec_point_formats &&
+-	             s->version != DTLS1_VERSION)
++		else if (type == TLSEXT_TYPE_ec_point_formats)
+ 			{
+ 			unsigned char *sdata = data;
+ 			int ecpointformatlist_length = *(sdata++);
+@@ -1189,8 +1185,7 @@ int ssl_parse_clienthello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ 			fprintf(stderr,"\n");
+ #endif
+ 			}
+-		else if (type == TLSEXT_TYPE_elliptic_curves &&
+-	             s->version != DTLS1_VERSION)
++		else if (type == TLSEXT_TYPE_elliptic_curves)
+ 			{
+ 			unsigned char *sdata = data;
+ 			int ellipticcurvelist_length = (*(sdata++) << 8);
+@@ -1549,8 +1544,7 @@ int ssl_parse_serverhello_tlsext(SSL *s, unsigned char **p, unsigned char *d, in
+ 			}
+ 
+ #ifndef OPENSSL_NO_EC
+-		else if (type == TLSEXT_TYPE_ec_point_formats &&
+-	             s->version != DTLS1_VERSION)
++		else if (type == TLSEXT_TYPE_ec_point_formats)
+ 			{
+ 			unsigned char *sdata = data;
+ 			int ecpointformatlist_length = *(sdata++);
+-- 
+1.8.3.1
+
diff --git a/SOURCES/openssl-1.0.1e-ecdh-auto.patch b/SOURCES/openssl-1.0.1e-ecdh-auto.patch
new file mode 100644
index 0000000..2e6ac57
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-ecdh-auto.patch
@@ -0,0 +1,248 @@
+diff -up openssl-1.0.1e/apps/s_server.c.ecdh-auto openssl-1.0.1e/apps/s_server.c
+--- openssl-1.0.1e/apps/s_server.c.ecdh-auto	2014-09-17 15:52:01.659445244 +0200
++++ openssl-1.0.1e/apps/s_server.c	2014-09-17 16:24:44.378754502 +0200
+@@ -1708,7 +1708,7 @@ bad:
+ 		{
+ 		EC_KEY *ecdh=NULL;
+ 
+-		if (named_curve)
++		if (named_curve && strcmp(named_curve, "auto"))
+ 			{
+ 			int nid = OBJ_sn2nid(named_curve);
+ 
+@@ -1731,6 +1731,8 @@ bad:
+ 			{
+ 			BIO_printf(bio_s_out,"Setting temp ECDH parameters\n");
+ 			}
++		else if (named_curve)
++			SSL_CTX_set_ecdh_auto(ctx, 1);
+ 		else
+ 			{
+ 			BIO_printf(bio_s_out,"Using default temp ECDH parameters\n");
+diff -up openssl-1.0.1e/ssl/ssl_cert.c.ecdh-auto openssl-1.0.1e/ssl/ssl_cert.c
+--- openssl-1.0.1e/ssl/ssl_cert.c.ecdh-auto	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/ssl/ssl_cert.c	2014-09-17 16:20:24.355884360 +0200
+@@ -270,6 +270,7 @@ CERT *ssl_cert_dup(CERT *cert)
+ 			}
+ 		}
+ 	ret->ecdh_tmp_cb = cert->ecdh_tmp_cb;
++	ret->ecdh_tmp_auto = cert->ecdh_tmp_auto;
+ #endif
+ 
+ 	for (i = 0; i < SSL_PKEY_NUM; i++)
+diff -up openssl-1.0.1e/ssl/ssl.h.ecdh-auto openssl-1.0.1e/ssl/ssl.h
+--- openssl-1.0.1e/ssl/ssl.h.ecdh-auto	2014-09-17 16:20:24.354884336 +0200
++++ openssl-1.0.1e/ssl/ssl.h	2014-09-17 16:49:29.135273514 +0200
+@@ -1563,6 +1563,7 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
+ #define SSL_CTRL_GET_EXTRA_CHAIN_CERTS		82
+ #define SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS	83
+ 
++#define SSL_CTRL_SET_ECDH_AUTO			94
+ #define SSL_CTRL_GET_SERVER_TMP_KEY		109
+ 
+ #define DTLSv1_get_timeout(ssl, arg) \
+@@ -1606,6 +1607,11 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
+ #define SSL_CTX_clear_extra_chain_certs(ctx) \
+ 	SSL_CTX_ctrl(ctx,SSL_CTRL_CLEAR_EXTRA_CHAIN_CERTS,0,NULL)
+ 
++#define SSL_CTX_set_ecdh_auto(ctx, onoff) \
++	SSL_CTX_ctrl(ctx,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL)
++#define SSL_set_ecdh_auto(s, onoff) \
++	SSL_ctrl(s,SSL_CTRL_SET_ECDH_AUTO,onoff,NULL)
++
+ #define SSL_get_server_tmp_key(s, pk) \
+ 	SSL_ctrl(s,SSL_CTRL_GET_SERVER_TMP_KEY,0,pk)
+ 
+diff -up openssl-1.0.1e/ssl/ssl_lib.c.ecdh-auto openssl-1.0.1e/ssl/ssl_lib.c
+--- openssl-1.0.1e/ssl/ssl_lib.c.ecdh-auto	2014-09-17 15:52:01.616444274 +0200
++++ openssl-1.0.1e/ssl/ssl_lib.c	2014-09-17 16:20:24.356884383 +0200
+@@ -2045,7 +2045,7 @@ void ssl_set_cert_masks(CERT *c, const S
+ #endif
+ 
+ #ifndef OPENSSL_NO_ECDH
+-	have_ecdh_tmp=(c->ecdh_tmp != NULL || c->ecdh_tmp_cb != NULL);
++	have_ecdh_tmp=(c->ecdh_tmp || c->ecdh_tmp_cb || c->ecdh_tmp_auto);
+ #endif
+ 	cpk= &(c->pkeys[SSL_PKEY_RSA_ENC]);
+ 	rsa_enc= (cpk->x509 != NULL && cpk->privatekey != NULL);
+diff -up openssl-1.0.1e/ssl/ssl_locl.h.ecdh-auto openssl-1.0.1e/ssl/ssl_locl.h
+--- openssl-1.0.1e/ssl/ssl_locl.h.ecdh-auto	2014-09-17 15:52:01.632444635 +0200
++++ openssl-1.0.1e/ssl/ssl_locl.h	2014-09-17 17:26:29.764405189 +0200
+@@ -511,6 +511,8 @@ typedef struct cert_st
+ 	EC_KEY *ecdh_tmp;
+ 	/* Callback for generating ephemeral ECDH keys */
+ 	EC_KEY *(*ecdh_tmp_cb)(SSL *ssl,int is_export,int keysize);
++	/* Select ECDH parameters automatically */
++	int ecdh_tmp_auto;
+ #endif
+ 
+ 	CERT_PKEY pkeys[SSL_PKEY_NUM];
+@@ -1091,6 +1093,7 @@ SSL_COMP *ssl3_comp_find(STACK_OF(SSL_CO
+ #ifndef OPENSSL_NO_EC
+ int tls1_ec_curve_id2nid(int curve_id);
+ int tls1_ec_nid2curve_id(int nid);
++int tls1_shared_curve(SSL *s, int nmatch);
+ #endif /* OPENSSL_NO_EC */
+ 
+ #ifndef OPENSSL_NO_TLSEXT
+diff -up openssl-1.0.1e/ssl/s3_lib.c.ecdh-auto openssl-1.0.1e/ssl/s3_lib.c
+--- openssl-1.0.1e/ssl/s3_lib.c.ecdh-auto	2014-09-17 16:20:24.352884288 +0200
++++ openssl-1.0.1e/ssl/s3_lib.c	2014-09-17 17:37:26.274226185 +0200
+@@ -3350,6 +3350,12 @@ long ssl3_ctrl(SSL *s, int cmd, long lar
+ #endif
+ 
+ #endif /* !OPENSSL_NO_TLSEXT */
++
++#ifndef OPENSSL_NO_EC
++	case SSL_CTRL_SET_ECDH_AUTO:
++		s->cert->ecdh_tmp_auto = larg;
++		return 1;
++#endif
+ 	case SSL_CTRL_GET_SERVER_TMP_KEY:
+ 		if (s->server || !s->session || !s->session->sess_cert)
+ 			return 0;
+@@ -3651,6 +3657,12 @@ long ssl3_ctx_ctrl(SSL_CTX *ctx, int cmd
+ 		ctx->srp_ctx.strength=larg;
+ 		break;
+ #endif
++
++#ifndef OPENSSL_NO_EC
++	case SSL_CTRL_SET_ECDH_AUTO:
++		ctx->cert->ecdh_tmp_auto = larg;
++		return 1;
++#endif
+ #endif /* !OPENSSL_NO_TLSEXT */
+ 
+ 	/* A Thawte special :-) */
+@@ -4003,6 +4015,14 @@ SSL_CIPHER *ssl3_choose_cipher(SSL *s, S
+ 		if (
+ 			/* if we are considering an ECC cipher suite that uses an ephemeral EC key */
+ 			(alg_k & SSL_kEECDH)
++			&& (s->cert->ecdh_tmp_auto)
++		)
++			{
++			ok = ok && tls1_shared_curve(s, 0);
++			}
++		else if (
++			/* if we are considering an ECC cipher suite that uses an ephemeral EC key */
++			(alg_k & SSL_kEECDH)
+ 			/* and we have an ephemeral EC key */
+ 			&& (s->cert->ecdh_tmp != NULL)
+ 			/* and the client specified an EllipticCurves extension */
+diff -up openssl-1.0.1e/ssl/s3_srvr.c.ecdh-auto openssl-1.0.1e/ssl/s3_srvr.c
+--- openssl-1.0.1e/ssl/s3_srvr.c.ecdh-auto	2014-09-17 15:52:01.644444906 +0200
++++ openssl-1.0.1e/ssl/s3_srvr.c	2014-09-17 16:20:24.353884312 +0200
+@@ -1693,7 +1693,14 @@ int ssl3_send_server_key_exchange(SSL *s
+ 			const EC_GROUP *group;
+ 
+ 			ecdhp=cert->ecdh_tmp;
+-			if ((ecdhp == NULL) && (s->cert->ecdh_tmp_cb != NULL))
++			if (s->cert->ecdh_tmp_auto)
++				{
++				/* Get NID of first shared curve */
++				int nid = tls1_shared_curve(s, 0);
++				if (nid != NID_undef)
++					ecdhp = EC_KEY_new_by_curve_name(nid);
++				}
++			else if ((ecdhp == NULL) && s->cert->ecdh_tmp_cb)
+ 				{
+ 				ecdhp=s->cert->ecdh_tmp_cb(s,
+ 				      SSL_C_IS_EXPORT(s->s3->tmp.new_cipher),
+@@ -1718,7 +1725,9 @@ int ssl3_send_server_key_exchange(SSL *s
+ 				SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_R_ECDH_LIB);
+ 				goto err;
+ 				}
+-			if ((ecdh = EC_KEY_dup(ecdhp)) == NULL)
++			if (s->cert->ecdh_tmp_auto)
++				ecdh = ecdhp;
++			else if ((ecdh = EC_KEY_dup(ecdhp)) == NULL)
+ 				{
+ 				SSLerr(SSL_F_SSL3_SEND_SERVER_KEY_EXCHANGE,ERR_R_ECDH_LIB);
+ 				goto err;
+diff -up openssl-1.0.1e/ssl/t1_lib.c.ecdh-auto openssl-1.0.1e/ssl/t1_lib.c
+--- openssl-1.0.1e/ssl/t1_lib.c.ecdh-auto	2014-09-17 16:20:24.358884427 +0200
++++ openssl-1.0.1e/ssl/t1_lib.c	2014-09-17 17:32:04.054951942 +0200
+@@ -202,6 +202,13 @@ static int nid_list[] =
+ 		NID_secp521r1  /* secp521r1 (25) */	
+ 	};
+ 
++static const unsigned char eccurves_default[] =
++	{
++		0,23, /* secp256r1 (23) */ 
++		0,24, /* secp384r1 (24) */
++		0,25, /* secp521r1 (25) */	
++	};
++
+ static int pref_list[] =
+ 	{
+ 		NID_secp521r1, /* secp521r1 (25) */	
+@@ -277,6 +284,69 @@ int tls1_ec_nid2curve_id(int nid)
+ 		return 0;
+ 		}
+ 	}
++/* Get curves list, if "sess" is set return client curves otherwise
++ * preferred list
++ */
++static void tls1_get_curvelist(SSL *s, int sess,
++					const unsigned char **pcurves,
++					size_t *pcurveslen)
++	{
++	if (sess)
++		{
++		*pcurves = s->session->tlsext_ellipticcurvelist;
++		*pcurveslen = s->session->tlsext_ellipticcurvelist_length;
++		}
++	else
++		{
++		*pcurves = s->tlsext_ellipticcurvelist;
++		*pcurveslen = s->tlsext_ellipticcurvelist_length;
++		}
++	if (!*pcurves)
++		{
++		*pcurves = eccurves_default;
++		*pcurveslen = sizeof(eccurves_default);
++		}
++	}
++/* Return nth shared curve. If nmatch == -1 return number of
++ * matches.
++ */
++
++int tls1_shared_curve(SSL *s, int nmatch)
++	{
++	const unsigned char *pref, *supp;
++	size_t preflen, supplen, i, j;
++	int k;
++	/* Can't do anything on client side */
++	if (s->server == 0)
++		return -1;
++	tls1_get_curvelist(s, !!(s->options & SSL_OP_CIPHER_SERVER_PREFERENCE),
++				&supp, &supplen);
++	tls1_get_curvelist(s, !(s->options & SSL_OP_CIPHER_SERVER_PREFERENCE),
++				&pref, &preflen);
++	preflen /= 2;
++	supplen /= 2;
++	k = 0;
++	for (i = 0; i < preflen; i++, pref+=2)
++		{
++		const unsigned char *tsupp = supp;
++		for (j = 0; j < supplen; j++, tsupp+=2)
++			{
++			if (pref[0] == tsupp[0] && pref[1] == tsupp[1])
++				{
++				if (nmatch == k)
++					{
++					int id = (pref[0] << 8) | pref[1];
++					return tls1_ec_curve_id2nid(id);
++					}
++				k++;
++				}
++			}
++		}
++	if (nmatch == -1)
++		return k;
++	return 0;
++	}
++
+ #endif /* OPENSSL_NO_EC */
+ 
+ #ifndef OPENSSL_NO_TLSEXT
diff --git a/SOURCES/openssl-1.0.1e-enc-fail.patch b/SOURCES/openssl-1.0.1e-enc-fail.patch
new file mode 100644
index 0000000..a5a43f0
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-enc-fail.patch
@@ -0,0 +1,39 @@
+diff -up openssl-1.0.1e/crypto/evp/bio_enc.c.enc-fail openssl-1.0.1e/crypto/evp/bio_enc.c
+--- openssl-1.0.1e/crypto/evp/bio_enc.c.enc-fail	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/crypto/evp/bio_enc.c	2014-03-04 15:21:12.185821738 +0100
+@@ -198,10 +198,15 @@ static int enc_read(BIO *b, char *out, i
+ 			}
+ 		else
+ 			{
+-			EVP_CipherUpdate(&(ctx->cipher),
++			if (!EVP_CipherUpdate(&(ctx->cipher),
+ 				(unsigned char *)ctx->buf,&ctx->buf_len,
+-				(unsigned char *)&(ctx->buf[BUF_OFFSET]),i);
+-			ctx->cont=1;
++				(unsigned char *)&(ctx->buf[BUF_OFFSET]),i))
++				{
++				ctx->ok = 0;
++				ctx->cont = 0;
++				}
++			else
++				ctx->cont=1;
+ 			/* Note: it is possible for EVP_CipherUpdate to
+ 			 * decrypt zero bytes because this is or looks like
+ 			 * the final block: if this happens we should retry
+@@ -257,9 +262,14 @@ static int enc_write(BIO *b, const char
+ 	while (inl > 0)
+ 		{
+ 		n=(inl > ENC_BLOCK_SIZE)?ENC_BLOCK_SIZE:inl;
+-		EVP_CipherUpdate(&(ctx->cipher),
++		if (!EVP_CipherUpdate(&(ctx->cipher),
+ 			(unsigned char *)ctx->buf,&ctx->buf_len,
+-			(unsigned char *)in,n);
++			(unsigned char *)in,n))
++			{
++			BIO_copy_next_retry(b);
++			ctx->ok = 0;
++			return ret - inl;
++			}
+ 		inl-=n;
+ 		in+=n;
+ 
diff --git a/SOURCES/openssl-1.0.1e-evp-wrap.patch b/SOURCES/openssl-1.0.1e-evp-wrap.patch
new file mode 100644
index 0000000..bf46f6c
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-evp-wrap.patch
@@ -0,0 +1,1239 @@
+diff -up openssl-1.0.1e/crypto/aes/aes_wrap.c.wrap openssl-1.0.1e/crypto/aes/aes_wrap.c
+--- openssl-1.0.1e/crypto/aes/aes_wrap.c.wrap	2013-02-11 16:02:47.000000000 +0100
++++ openssl-1.0.1e/crypto/aes/aes_wrap.c	2014-09-09 16:12:25.852801573 +0200
+@@ -53,207 +53,18 @@
+ 
+ #include "cryptlib.h"
+ #include <openssl/aes.h>
+-#include <openssl/bio.h>
+-
+-static const unsigned char default_iv[] = {
+-  0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
+-};
++#include <openssl/modes.h>
+ 
+ int AES_wrap_key(AES_KEY *key, const unsigned char *iv,
+ 		unsigned char *out,
+ 		const unsigned char *in, unsigned int inlen)
+ 	{
+-	unsigned char *A, B[16], *R;
+-	unsigned int i, j, t;
+-	if ((inlen & 0x7) || (inlen < 8))
+-		return -1;
+-	A = B;
+-	t = 1;
+-	memcpy(out + 8, in, inlen);
+-	if (!iv)
+-		iv = default_iv;
+-
+-	memcpy(A, iv, 8);
+-
+-	for (j = 0; j < 6; j++)
+-		{
+-		R = out + 8;
+-		for (i = 0; i < inlen; i += 8, t++, R += 8)
+-			{
+-			memcpy(B + 8, R, 8);
+-			AES_encrypt(B, B, key);
+-			A[7] ^= (unsigned char)(t & 0xff);
+-			if (t > 0xff)	
+-				{
+-				A[6] ^= (unsigned char)((t >> 8) & 0xff);
+-				A[5] ^= (unsigned char)((t >> 16) & 0xff);
+-				A[4] ^= (unsigned char)((t >> 24) & 0xff);
+-				}
+-			memcpy(R, B + 8, 8);
+-			}
+-		}
+-	memcpy(out, A, 8);
+-	return inlen + 8;
++	return CRYPTO_128_wrap(key, iv, out, in, inlen, (block128_f)AES_encrypt);
+ 	}
+ 
+ int AES_unwrap_key(AES_KEY *key, const unsigned char *iv,
+ 		unsigned char *out,
+ 		const unsigned char *in, unsigned int inlen)
+ 	{
+-	unsigned char *A, B[16], *R;
+-	unsigned int i, j, t;
+-	inlen -= 8;
+-	if (inlen & 0x7)
+-		return -1;
+-	if (inlen < 8)
+-		return -1;
+-	A = B;
+-	t =  6 * (inlen >> 3);
+-	memcpy(A, in, 8);
+-	memcpy(out, in + 8, inlen);
+-	for (j = 0; j < 6; j++)
+-		{
+-		R = out + inlen - 8;
+-		for (i = 0; i < inlen; i += 8, t--, R -= 8)
+-			{
+-			A[7] ^= (unsigned char)(t & 0xff);
+-			if (t > 0xff)	
+-				{
+-				A[6] ^= (unsigned char)((t >> 8) & 0xff);
+-				A[5] ^= (unsigned char)((t >> 16) & 0xff);
+-				A[4] ^= (unsigned char)((t >> 24) & 0xff);
+-				}
+-			memcpy(B + 8, R, 8);
+-			AES_decrypt(B, B, key);
+-			memcpy(R, B + 8, 8);
+-			}
+-		}
+-	if (!iv)
+-		iv = default_iv;
+-	if (memcmp(A, iv, 8))
+-		{
+-		OPENSSL_cleanse(out, inlen);
+-		return 0;
+-		}
+-	return inlen;
+-	}
+-
+-#ifdef AES_WRAP_TEST
+-
+-int AES_wrap_unwrap_test(const unsigned char *kek, int keybits,
+-			 const unsigned char *iv,
+-			 const unsigned char *eout,
+-			 const unsigned char *key, int keylen)
+-	{
+-	unsigned char *otmp = NULL, *ptmp = NULL;
+-	int r, ret = 0;
+-	AES_KEY wctx;
+-	otmp = OPENSSL_malloc(keylen + 8);
+-	ptmp = OPENSSL_malloc(keylen);
+-	if (!otmp || !ptmp)
+-		return 0;
+-	if (AES_set_encrypt_key(kek, keybits, &wctx))
+-		goto err;
+-	r = AES_wrap_key(&wctx, iv, otmp, key, keylen);
+-	if (r <= 0)
+-		goto err;
+-
+-	if (eout && memcmp(eout, otmp, keylen))
+-		goto err;
+-		
+-	if (AES_set_decrypt_key(kek, keybits, &wctx))
+-		goto err;
+-	r = AES_unwrap_key(&wctx, iv, ptmp, otmp, r);
+-
+-	if (memcmp(key, ptmp, keylen))
+-		goto err;
+-
+-	ret = 1;
+-
+-	err:
+-	if (otmp)
+-		OPENSSL_free(otmp);
+-	if (ptmp)
+-		OPENSSL_free(ptmp);
+-
+-	return ret;
+-
++	return CRYPTO_128_unwrap(key, iv, out, in, inlen, (block128_f)AES_decrypt);
+ 	}
+-
+-
+-
+-int main(int argc, char **argv)
+-{
+-
+-static const unsigned char kek[] = {
+-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+-  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+-  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+-  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
+-};
+-
+-static const unsigned char key[] = {
+-  0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+-  0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff,
+-  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+-  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+-};
+-
+-static const unsigned char e1[] = {
+-  0x1f, 0xa6, 0x8b, 0x0a, 0x81, 0x12, 0xb4, 0x47,
+-  0xae, 0xf3, 0x4b, 0xd8, 0xfb, 0x5a, 0x7b, 0x82,
+-  0x9d, 0x3e, 0x86, 0x23, 0x71, 0xd2, 0xcf, 0xe5
+-};
+-
+-static const unsigned char e2[] = {
+-  0x96, 0x77, 0x8b, 0x25, 0xae, 0x6c, 0xa4, 0x35,
+-  0xf9, 0x2b, 0x5b, 0x97, 0xc0, 0x50, 0xae, 0xd2,
+-  0x46, 0x8a, 0xb8, 0xa1, 0x7a, 0xd8, 0x4e, 0x5d
+-};
+-
+-static const unsigned char e3[] = {
+-  0x64, 0xe8, 0xc3, 0xf9, 0xce, 0x0f, 0x5b, 0xa2,
+-  0x63, 0xe9, 0x77, 0x79, 0x05, 0x81, 0x8a, 0x2a,
+-  0x93, 0xc8, 0x19, 0x1e, 0x7d, 0x6e, 0x8a, 0xe7
+-};
+-
+-static const unsigned char e4[] = {
+-  0x03, 0x1d, 0x33, 0x26, 0x4e, 0x15, 0xd3, 0x32,
+-  0x68, 0xf2, 0x4e, 0xc2, 0x60, 0x74, 0x3e, 0xdc,
+-  0xe1, 0xc6, 0xc7, 0xdd, 0xee, 0x72, 0x5a, 0x93,
+-  0x6b, 0xa8, 0x14, 0x91, 0x5c, 0x67, 0x62, 0xd2
+-};
+-
+-static const unsigned char e5[] = {
+-  0xa8, 0xf9, 0xbc, 0x16, 0x12, 0xc6, 0x8b, 0x3f,
+-  0xf6, 0xe6, 0xf4, 0xfb, 0xe3, 0x0e, 0x71, 0xe4,
+-  0x76, 0x9c, 0x8b, 0x80, 0xa3, 0x2c, 0xb8, 0x95,
+-  0x8c, 0xd5, 0xd1, 0x7d, 0x6b, 0x25, 0x4d, 0xa1
+-};
+-
+-static const unsigned char e6[] = {
+-  0x28, 0xc9, 0xf4, 0x04, 0xc4, 0xb8, 0x10, 0xf4,
+-  0xcb, 0xcc, 0xb3, 0x5c, 0xfb, 0x87, 0xf8, 0x26,
+-  0x3f, 0x57, 0x86, 0xe2, 0xd8, 0x0e, 0xd3, 0x26,
+-  0xcb, 0xc7, 0xf0, 0xe7, 0x1a, 0x99, 0xf4, 0x3b,
+-  0xfb, 0x98, 0x8b, 0x9b, 0x7a, 0x02, 0xdd, 0x21
+-};
+-
+-	AES_KEY wctx, xctx;
+-	int ret;
+-	ret = AES_wrap_unwrap_test(kek, 128, NULL, e1, key, 16);
+-	fprintf(stderr, "Key test result %d\n", ret);
+-	ret = AES_wrap_unwrap_test(kek, 192, NULL, e2, key, 16);
+-	fprintf(stderr, "Key test result %d\n", ret);
+-	ret = AES_wrap_unwrap_test(kek, 256, NULL, e3, key, 16);
+-	fprintf(stderr, "Key test result %d\n", ret);
+-	ret = AES_wrap_unwrap_test(kek, 192, NULL, e4, key, 24);
+-	fprintf(stderr, "Key test result %d\n", ret);
+-	ret = AES_wrap_unwrap_test(kek, 256, NULL, e5, key, 24);
+-	fprintf(stderr, "Key test result %d\n", ret);
+-	ret = AES_wrap_unwrap_test(kek, 256, NULL, e6, key, 32);
+-	fprintf(stderr, "Key test result %d\n", ret);
+-}
+-	
+-	
+-#endif
+diff -up openssl-1.0.1e/crypto/evp/c_allc.c.wrap openssl-1.0.1e/crypto/evp/c_allc.c
+--- openssl-1.0.1e/crypto/evp/c_allc.c.wrap	2014-09-09 16:11:24.103379348 +0200
++++ openssl-1.0.1e/crypto/evp/c_allc.c	2014-09-09 16:12:25.853801601 +0200
+@@ -98,6 +98,7 @@ void OpenSSL_add_all_ciphers(void)
+ 	EVP_add_cipher(EVP_des_ecb());
+ 	EVP_add_cipher(EVP_des_ede());
+ 	EVP_add_cipher(EVP_des_ede3());
++	EVP_add_cipher(EVP_des_ede3_wrap());
+ #endif
+ 
+ #ifndef OPENSSL_NO_RC4
+@@ -177,6 +178,8 @@ void OpenSSL_add_all_ciphers(void)
+ 	EVP_add_cipher(EVP_aes_128_ctr());
+ 	EVP_add_cipher(EVP_aes_128_gcm());
+ 	EVP_add_cipher(EVP_aes_128_xts());
++	EVP_add_cipher(EVP_aes_128_wrap());
++	EVP_add_cipher(EVP_aes_128_wrap_pad());
+ 	EVP_add_cipher_alias(SN_aes_128_cbc,"AES128");
+ 	EVP_add_cipher_alias(SN_aes_128_cbc,"aes128");
+ 	EVP_add_cipher(EVP_aes_192_ecb());
+@@ -187,6 +190,8 @@ void OpenSSL_add_all_ciphers(void)
+ 	EVP_add_cipher(EVP_aes_192_ofb());
+ 	EVP_add_cipher(EVP_aes_192_ctr());
+ 	EVP_add_cipher(EVP_aes_192_gcm());
++	EVP_add_cipher(EVP_aes_192_wrap());
++	EVP_add_cipher(EVP_aes_192_wrap_pad());
+ 	EVP_add_cipher_alias(SN_aes_192_cbc,"AES192");
+ 	EVP_add_cipher_alias(SN_aes_192_cbc,"aes192");
+ 	EVP_add_cipher(EVP_aes_256_ecb());
+@@ -198,6 +203,8 @@ void OpenSSL_add_all_ciphers(void)
+ 	EVP_add_cipher(EVP_aes_256_ctr());
+ 	EVP_add_cipher(EVP_aes_256_gcm());
+ 	EVP_add_cipher(EVP_aes_256_xts());
++	EVP_add_cipher(EVP_aes_256_wrap());
++	EVP_add_cipher(EVP_aes_256_wrap_pad());
+ 	EVP_add_cipher_alias(SN_aes_256_cbc,"AES256");
+ 	EVP_add_cipher_alias(SN_aes_256_cbc,"aes256");
+ #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1)
+@@ -250,6 +257,7 @@ void OpenSSL_add_all_ciphers(void)
+ 
+ 	EVP_add_cipher(EVP_des_ede());
+ 	EVP_add_cipher(EVP_des_ede3());
++	EVP_add_cipher(EVP_des_ede3_wrap());
+ #endif
+ 
+ #ifndef OPENSSL_NO_AES
+@@ -262,6 +270,8 @@ void OpenSSL_add_all_ciphers(void)
+ 	EVP_add_cipher(EVP_aes_128_ctr());
+ 	EVP_add_cipher(EVP_aes_128_gcm());
+ 	EVP_add_cipher(EVP_aes_128_xts());
++	EVP_add_cipher(EVP_aes_128_wrap());
++	EVP_add_cipher(EVP_aes_128_wrap_pad());
+ 	EVP_add_cipher_alias(SN_aes_128_cbc,"AES128");
+ 	EVP_add_cipher_alias(SN_aes_128_cbc,"aes128");
+ 	EVP_add_cipher(EVP_aes_192_ecb());
+@@ -272,6 +282,8 @@ void OpenSSL_add_all_ciphers(void)
+ 	EVP_add_cipher(EVP_aes_192_ofb());
+ 	EVP_add_cipher(EVP_aes_192_ctr());
+ 	EVP_add_cipher(EVP_aes_192_gcm());
++	EVP_add_cipher(EVP_aes_192_wrap());
++	EVP_add_cipher(EVP_aes_192_wrap_pad());
+ 	EVP_add_cipher_alias(SN_aes_192_cbc,"AES192");
+ 	EVP_add_cipher_alias(SN_aes_192_cbc,"aes192");
+ 	EVP_add_cipher(EVP_aes_256_ecb());
+@@ -283,6 +295,8 @@ void OpenSSL_add_all_ciphers(void)
+ 	EVP_add_cipher(EVP_aes_256_ctr());
+ 	EVP_add_cipher(EVP_aes_256_gcm());
+ 	EVP_add_cipher(EVP_aes_256_xts());
++	EVP_add_cipher(EVP_aes_256_wrap());
++	EVP_add_cipher(EVP_aes_256_wrap_pad());
+ 	EVP_add_cipher_alias(SN_aes_256_cbc,"AES256");
+ 	EVP_add_cipher_alias(SN_aes_256_cbc,"aes256");
+ #endif
+diff -up openssl-1.0.1e/crypto/evp/e_aes.c.wrap openssl-1.0.1e/crypto/evp/e_aes.c
+--- openssl-1.0.1e/crypto/evp/e_aes.c.wrap	2014-09-09 16:11:24.103379348 +0200
++++ openssl-1.0.1e/crypto/evp/e_aes.c	2014-09-09 16:12:25.853801601 +0200
+@@ -1,5 +1,5 @@
+ /* ====================================================================
+- * Copyright (c) 2001-2011 The OpenSSL Project.  All rights reserved.
++ * Copyright (c) 2001-2014 The OpenSSL Project.  All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+@@ -1323,4 +1323,180 @@ BLOCK_CIPHER_custom(NID_aes,128,1,12,ccm
+ BLOCK_CIPHER_custom(NID_aes,192,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS)
+ BLOCK_CIPHER_custom(NID_aes,256,1,12,ccm,CCM,EVP_CIPH_FLAG_FIPS|CUSTOM_FLAGS)
+ 
++typedef struct
++	{
++	union { double align; AES_KEY ks; } ks;
++	/* Indicates if IV has been set */
++	unsigned char *iv;
++	} EVP_AES_WRAP_CTX;
++
++static int aes_wrap_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
++                        const unsigned char *iv, int enc)
++	{
++	EVP_AES_WRAP_CTX *wctx = ctx->cipher_data;
++	if (!iv && !key)
++		return 1;
++	if (key)
++		{
++		if (ctx->encrypt)
++			AES_set_encrypt_key(key, ctx->key_len * 8, &wctx->ks.ks);
++		else
++			AES_set_decrypt_key(key, ctx->key_len * 8, &wctx->ks.ks);
++		if (!iv)
++			wctx->iv = NULL;
++		}
++	if (iv)
++		{
++		memcpy(ctx->iv, iv, EVP_CIPHER_CTX_iv_length(ctx));
++		wctx->iv = ctx->iv;
++		}
++	return 1;
++	}
++
++static int aes_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
++		const unsigned char *in, size_t inlen)
++	{
++	EVP_AES_WRAP_CTX *wctx = ctx->cipher_data;
++	size_t rv;
++	/* AES wrap with padding has IV length of 4, without padding 8 */
++	int pad = EVP_CIPHER_CTX_iv_length(ctx) == 4;
++	/* No final operation so always return zero length */
++	if (!in)
++		return 0;
++	/* Input length must always be non-zero */
++	if (!inlen)
++		return -1;
++	/* If decrypting need at least 16 bytes and multiple of 8 */
++	if (!ctx->encrypt && (inlen < 16 || inlen & 0x7))
++		return -1;
++	/* If not padding input must be multiple of 8 */
++	if (!pad && inlen & 0x7)
++		return -1;
++	if (!out)
++		{
++		if (ctx->encrypt)
++			{
++			/* If padding round up to multiple of 8 */
++			if (pad)
++				inlen = (inlen + 7)/8 * 8;
++			/* 8 byte prefix */
++			return inlen + 8;
++			}
++		else
++			{
++			/* If not padding output will be exactly 8 bytes
++			 * smaller than input. If padding it will be at
++			 * least 8 bytes smaller but we don't know how
++			 * much.
++			 */
++			return inlen - 8;
++			}
++		}
++	if (pad)
++		{
++		if (ctx->encrypt)
++			rv = CRYPTO_128_wrap_pad(&wctx->ks.ks, wctx->iv,
++						out, in, inlen,
++						(block128_f)AES_encrypt);
++		else
++			rv = CRYPTO_128_unwrap_pad(&wctx->ks.ks, wctx->iv,
++						out, in, inlen,
++						(block128_f)AES_decrypt);
++		}
++	else
++		{
++		if (ctx->encrypt)
++			rv = CRYPTO_128_wrap(&wctx->ks.ks, wctx->iv,
++						out, in, inlen,
++						(block128_f)AES_encrypt);
++		else
++			rv = CRYPTO_128_unwrap(&wctx->ks.ks, wctx->iv,
++						out, in, inlen,
++						(block128_f)AES_decrypt);
++		}
++	return rv ? (int)rv : -1;
++	}
++
++#define WRAP_FLAGS	(EVP_CIPH_WRAP_MODE | EVP_CIPH_FLAG_FIPS \
++		| EVP_CIPH_CUSTOM_IV | EVP_CIPH_FLAG_CUSTOM_CIPHER \
++		| EVP_CIPH_ALWAYS_CALL_INIT | EVP_CIPH_FLAG_DEFAULT_ASN1)
++
++static const EVP_CIPHER aes_128_wrap = {
++	NID_id_aes128_wrap,
++	8, 16, 8, WRAP_FLAGS,
++	aes_wrap_init_key, aes_wrap_cipher,
++	NULL,
++	sizeof(EVP_AES_WRAP_CTX),
++	NULL,NULL,NULL,NULL };
++
++const EVP_CIPHER *EVP_aes_128_wrap(void)
++	{
++	return &aes_128_wrap;
++	}
++
++static const EVP_CIPHER aes_192_wrap = {
++	NID_id_aes192_wrap,
++	8, 24, 8, WRAP_FLAGS,
++	aes_wrap_init_key, aes_wrap_cipher,
++	NULL,
++	sizeof(EVP_AES_WRAP_CTX),
++	NULL,NULL,NULL,NULL };
++
++const EVP_CIPHER *EVP_aes_192_wrap(void)
++	{
++	return &aes_192_wrap;
++	}
++
++static const EVP_CIPHER aes_256_wrap = {
++	NID_id_aes256_wrap,
++	8, 32, 8, WRAP_FLAGS,
++	aes_wrap_init_key, aes_wrap_cipher,
++	NULL,
++	sizeof(EVP_AES_WRAP_CTX),
++	NULL,NULL,NULL,NULL };
++
++const EVP_CIPHER *EVP_aes_256_wrap(void)
++	{
++	return &aes_256_wrap;
++	}
++
++static const EVP_CIPHER aes_128_wrap_pad = {
++	NID_id_aes128_wrap_pad,
++	8, 16, 4, WRAP_FLAGS,
++	aes_wrap_init_key, aes_wrap_cipher,
++	NULL,
++	sizeof(EVP_AES_WRAP_CTX),
++	NULL,NULL,NULL,NULL };
++
++const EVP_CIPHER *EVP_aes_128_wrap_pad(void)
++	{
++	return &aes_128_wrap_pad;
++	}
++
++static const EVP_CIPHER aes_192_wrap_pad = {
++	NID_id_aes192_wrap_pad,
++	8, 24, 4, WRAP_FLAGS,
++	aes_wrap_init_key, aes_wrap_cipher,
++	NULL,
++	sizeof(EVP_AES_WRAP_CTX),
++	NULL,NULL,NULL,NULL };
++
++const EVP_CIPHER *EVP_aes_192_wrap_pad(void)
++	{
++	return &aes_192_wrap_pad;
++	}
++
++static const EVP_CIPHER aes_256_wrap_pad = {
++	NID_id_aes256_wrap_pad,
++	8, 32, 4, WRAP_FLAGS,
++	aes_wrap_init_key, aes_wrap_cipher,
++	NULL,
++	sizeof(EVP_AES_WRAP_CTX),
++	NULL,NULL,NULL,NULL };
++
++const EVP_CIPHER *EVP_aes_256_wrap_pad(void)
++	{
++	return &aes_256_wrap_pad;
++	}
++
+ #endif
+diff -up openssl-1.0.1e/crypto/evp/e_des3.c.wrap openssl-1.0.1e/crypto/evp/e_des3.c
+--- openssl-1.0.1e/crypto/evp/e_des3.c.wrap	2014-09-09 16:11:24.104379372 +0200
++++ openssl-1.0.1e/crypto/evp/e_des3.c	2014-09-09 16:12:25.854801627 +0200
+@@ -310,4 +310,112 @@ const EVP_CIPHER *EVP_des_ede3(void)
+ {
+ 	return &des_ede3_ecb;
+ }
++
++#ifndef OPENSSL_NO_SHA
++
++#include <openssl/sha.h>
++
++static const unsigned char wrap_iv[8] = {0x4a,0xdd,0xa2,0x2c,0x79,0xe8,0x21,0x05};
++
++static int des_ede3_unwrap(EVP_CIPHER_CTX *ctx, unsigned char *out,
++				const unsigned char *in, size_t inl)
++	{
++	unsigned char icv[8], iv[8], sha1tmp[SHA_DIGEST_LENGTH];
++	int rv = -1;
++	if (inl < 24)
++		return -1;
++	if (!out)
++		return inl - 16;
++	memcpy(ctx->iv, wrap_iv, 8);
++	/* Decrypt first block which will end up as icv */
++	des_ede_cbc_cipher(ctx, icv, in, 8);
++	/* Decrypt central blocks */
++	/* If decrypting in place move whole output along a block
++	 * so the next des_ede_cbc_cipher is in place.
++	 */
++	if (out == in)
++		{
++		memmove(out, out + 8, inl - 8);
++		in -= 8;
++		}
++	des_ede_cbc_cipher(ctx, out, in + 8, inl - 16);
++	/* Decrypt final block which will be IV */
++	des_ede_cbc_cipher(ctx, iv, in + inl - 8, 8);
++	/* Reverse order of everything */
++	BUF_reverse(icv, NULL, 8);
++	BUF_reverse(out, NULL, inl - 16);
++	BUF_reverse(ctx->iv, iv, 8);
++	/* Decrypt again using new IV */
++	des_ede_cbc_cipher(ctx, out, out, inl - 16);
++	des_ede_cbc_cipher(ctx, icv, icv, 8);
++	/* Work out SHA1 hash of first portion */
++	SHA1(out, inl - 16, sha1tmp);
++
++	if (!CRYPTO_memcmp(sha1tmp, icv, 8))
++		rv = inl - 16;
++	OPENSSL_cleanse(icv, 8);
++	OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH);
++	OPENSSL_cleanse(iv, 8);
++	OPENSSL_cleanse(ctx->iv, 8);
++	if (rv == -1)
++		OPENSSL_cleanse(out, inl - 16);
++	
++	return rv;
++	}
++
++static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out,
++				const unsigned char *in, size_t inl)
++	{
++	unsigned char sha1tmp[SHA_DIGEST_LENGTH];
++	if (!out)
++		return inl + 16;
++	/* Copy input to output buffer + 8 so we have space for IV */
++	memmove(out + 8, in, inl);
++	/* Work out ICV */
++	SHA1(in, inl, sha1tmp);
++	memcpy(out + inl + 8, sha1tmp, 8);
++	OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH);
++	/* Generate random IV */
++	RAND_bytes(ctx->iv, 8);
++	memcpy(out, ctx->iv, 8);
++	/* Encrypt everything after IV in place */
++	des_ede_cbc_cipher(ctx, out + 8, out + 8, inl + 8);
++	BUF_reverse(out, NULL, inl + 16);
++	memcpy(ctx->iv, wrap_iv, 8);
++	des_ede_cbc_cipher(ctx, out, out, inl + 16);
++	return inl + 16;
++	}
++
++static int des_ede3_wrap_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
++				const unsigned char *in, size_t inl)
++	{
++	/* Sanity check input length: we typically only wrap keys
++	 * so EVP_MAXCHUNK is more than will ever be needed. Also
++	 * input length must be a multiple of 8 bits.
++	 */
++	if (inl >= EVP_MAXCHUNK || inl % 8)
++		return -1;
++	if (ctx->encrypt)
++		return des_ede3_wrap(ctx, out, in, inl);
++	else
++		return des_ede3_unwrap(ctx, out, in, inl);
++	}
++
++static const EVP_CIPHER des3_wrap = {
++	NID_id_smime_alg_CMS3DESwrap,
++	8, 24, 0,
++	EVP_CIPH_WRAP_MODE|EVP_CIPH_CUSTOM_IV|EVP_CIPH_FLAG_CUSTOM_CIPHER
++		|EVP_CIPH_FLAG_DEFAULT_ASN1|EVP_CIPH_FLAG_FIPS,
++	des_ede3_init_key, des_ede3_wrap_cipher,
++	NULL,	
++	sizeof(DES_EDE_KEY),
++	NULL,NULL,NULL,NULL };
++
++
++const EVP_CIPHER *EVP_des_ede3_wrap(void)
++	{
++	return &des3_wrap;
++	}
++
++# endif
+ #endif
+diff -up openssl-1.0.1e/crypto/evp/evp_enc.c.wrap openssl-1.0.1e/crypto/evp/evp_enc.c
+--- openssl-1.0.1e/crypto/evp/evp_enc.c.wrap	2014-09-09 16:11:24.104379372 +0200
++++ openssl-1.0.1e/crypto/evp/evp_enc.c	2014-09-09 16:12:25.854801627 +0200
+@@ -233,7 +233,8 @@ int EVP_CipherInit_ex(EVP_CIPHER_CTX *ct
+ 			ctx->cipher_data = NULL;
+ 			}
+ 		ctx->key_len = cipher->key_len;
+-		ctx->flags = 0;
++		/* Preserve wrap enable flag, zero everything else */
++		ctx->flags &= EVP_CIPHER_CTX_FLAG_WRAP_ALLOW;
+ 		if(ctx->cipher->flags & EVP_CIPH_CTRL_INIT)
+ 			{
+ 			if(!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL))
+@@ -256,6 +257,13 @@ skip_to_init:
+ 	    || ctx->cipher->block_size == 8
+ 	    || ctx->cipher->block_size == 16);
+ 
++	if(!(ctx->flags & EVP_CIPHER_CTX_FLAG_WRAP_ALLOW)
++		&& EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_WRAP_MODE)
++		{
++		EVPerr(EVP_F_EVP_CIPHERINIT_EX, EVP_R_WRAP_MODE_NOT_ALLOWED);
++		return 0;
++		}
++
+ 	if(!(EVP_CIPHER_CTX_flags(ctx) & EVP_CIPH_CUSTOM_IV)) {
+ 		switch(EVP_CIPHER_CTX_mode(ctx)) {
+ 
+diff -up openssl-1.0.1e/crypto/evp/evp_err.c.wrap openssl-1.0.1e/crypto/evp/evp_err.c
+--- openssl-1.0.1e/crypto/evp/evp_err.c.wrap	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/crypto/evp/evp_err.c	2014-09-09 16:12:25.854801627 +0200
+@@ -1,6 +1,6 @@
+ /* crypto/evp/evp_err.c */
+ /* ====================================================================
+- * Copyright (c) 1999-2011 The OpenSSL Project.  All rights reserved.
++ * Copyright (c) 1999-2013 The OpenSSL Project.  All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+@@ -220,6 +220,7 @@ static ERR_STRING_DATA EVP_str_reasons[]
+ {ERR_REASON(EVP_R_UNSUPPORTED_PRF)       ,"unsupported prf"},
+ {ERR_REASON(EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM),"unsupported private key algorithm"},
+ {ERR_REASON(EVP_R_UNSUPPORTED_SALT_TYPE) ,"unsupported salt type"},
++{ERR_REASON(EVP_R_WRAP_MODE_NOT_ALLOWED) ,"wrap mode not allowed"},
+ {ERR_REASON(EVP_R_WRONG_FINAL_BLOCK_LENGTH),"wrong final block length"},
+ {ERR_REASON(EVP_R_WRONG_PUBLIC_KEY_TYPE) ,"wrong public key type"},
+ {0,NULL}
+diff -up openssl-1.0.1e/crypto/evp/evp.h.wrap openssl-1.0.1e/crypto/evp/evp.h
+--- openssl-1.0.1e/crypto/evp/evp.h.wrap	2014-09-09 16:11:24.104379372 +0200
++++ openssl-1.0.1e/crypto/evp/evp.h	2014-09-09 16:12:25.855801651 +0200
+@@ -336,6 +336,7 @@ struct evp_cipher_st
+ #define		EVP_CIPH_GCM_MODE		0x6
+ #define		EVP_CIPH_CCM_MODE		0x7
+ #define		EVP_CIPH_XTS_MODE		0x10001
++#define		EVP_CIPH_WRAP_MODE		0x10002
+ #define 	EVP_CIPH_MODE			0xF0007
+ /* Set if variable length cipher */
+ #define 	EVP_CIPH_VARIABLE_LENGTH	0x8
+@@ -367,6 +368,13 @@ struct evp_cipher_st
+ #define 	EVP_CIPH_FLAG_CUSTOM_CIPHER	0x100000
+ #define		EVP_CIPH_FLAG_AEAD_CIPHER	0x200000
+ 
++/* Cipher context flag to indicate we can handle
++ * wrap mode: if allowed in older applications it could
++ * overflow buffers.
++ */
++
++#define 	EVP_CIPHER_CTX_FLAG_WRAP_ALLOW	0x1
++
+ /* ctrl() values */
+ 
+ #define		EVP_CTRL_INIT			0x0
+@@ -729,6 +737,7 @@ const EVP_CIPHER *EVP_des_cbc(void);
+ const EVP_CIPHER *EVP_des_ede_cbc(void);
+ const EVP_CIPHER *EVP_des_ede3_cbc(void);
+ const EVP_CIPHER *EVP_desx_cbc(void);
++const EVP_CIPHER *EVP_des_ede3_wrap(void);
+ /* This should now be supported through the dev_crypto ENGINE. But also, why are
+  * rc4 and md5 declarations made here inside a "NO_DES" precompiler branch? */
+ #if 0
+@@ -788,6 +797,8 @@ const EVP_CIPHER *EVP_aes_128_ctr(void);
+ const EVP_CIPHER *EVP_aes_128_ccm(void);
+ const EVP_CIPHER *EVP_aes_128_gcm(void);
+ const EVP_CIPHER *EVP_aes_128_xts(void);
++const EVP_CIPHER *EVP_aes_128_wrap(void);
++const EVP_CIPHER *EVP_aes_128_wrap_pad(void);
+ const EVP_CIPHER *EVP_aes_192_ecb(void);
+ const EVP_CIPHER *EVP_aes_192_cbc(void);
+ const EVP_CIPHER *EVP_aes_192_cfb1(void);
+@@ -798,6 +809,8 @@ const EVP_CIPHER *EVP_aes_192_ofb(void);
+ const EVP_CIPHER *EVP_aes_192_ctr(void);
+ const EVP_CIPHER *EVP_aes_192_ccm(void);
+ const EVP_CIPHER *EVP_aes_192_gcm(void);
++const EVP_CIPHER *EVP_aes_192_wrap(void);
++const EVP_CIPHER *EVP_aes_192_wrap_pad(void);
+ const EVP_CIPHER *EVP_aes_256_ecb(void);
+ const EVP_CIPHER *EVP_aes_256_cbc(void);
+ const EVP_CIPHER *EVP_aes_256_cfb1(void);
+@@ -809,6 +822,8 @@ const EVP_CIPHER *EVP_aes_256_ctr(void);
+ const EVP_CIPHER *EVP_aes_256_ccm(void);
+ const EVP_CIPHER *EVP_aes_256_gcm(void);
+ const EVP_CIPHER *EVP_aes_256_xts(void);
++const EVP_CIPHER *EVP_aes_256_wrap(void);
++const EVP_CIPHER *EVP_aes_256_wrap_pad(void);
+ #if !defined(OPENSSL_NO_SHA) && !defined(OPENSSL_NO_SHA1)
+ const EVP_CIPHER *EVP_aes_128_cbc_hmac_sha1(void);
+ const EVP_CIPHER *EVP_aes_256_cbc_hmac_sha1(void);
+@@ -1397,6 +1412,7 @@ void ERR_load_EVP_strings(void);
+ #define EVP_R_UNSUPPORTED_PRF				 125
+ #define EVP_R_UNSUPPORTED_PRIVATE_KEY_ALGORITHM		 118
+ #define EVP_R_UNSUPPORTED_SALT_TYPE			 126
++#define EVP_R_WRAP_MODE_NOT_ALLOWED			 170
+ #define EVP_R_WRONG_FINAL_BLOCK_LENGTH			 109
+ #define EVP_R_WRONG_PUBLIC_KEY_TYPE			 110
+ 
+diff -up openssl-1.0.1e/crypto/evp/evp_lib.c.wrap openssl-1.0.1e/crypto/evp/evp_lib.c
+--- openssl-1.0.1e/crypto/evp/evp_lib.c.wrap	2014-09-09 16:11:24.104379372 +0200
++++ openssl-1.0.1e/crypto/evp/evp_lib.c	2014-09-09 16:12:25.855801651 +0200
+@@ -68,7 +68,15 @@ int EVP_CIPHER_param_to_asn1(EVP_CIPHER_
+ 	if (c->cipher->set_asn1_parameters != NULL)
+ 		ret=c->cipher->set_asn1_parameters(c,type);
+ 	else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1)
+-		ret=EVP_CIPHER_set_asn1_iv(c, type);
++		{
++		if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE)
++			{
++			ASN1_TYPE_set(type, V_ASN1_NULL, NULL);
++			ret = 1;
++			}
++		else
++			ret=EVP_CIPHER_set_asn1_iv(c, type);
++		}
+ 	else
+ 		ret=-1;
+ 	return(ret);
+@@ -81,7 +89,11 @@ int EVP_CIPHER_asn1_to_param(EVP_CIPHER_
+ 	if (c->cipher->get_asn1_parameters != NULL)
+ 		ret=c->cipher->get_asn1_parameters(c,type);
+ 	else if (c->cipher->flags & EVP_CIPH_FLAG_DEFAULT_ASN1)
++		{
++		if (EVP_CIPHER_CTX_mode(c) == EVP_CIPH_WRAP_MODE)
++			return 1;
+ 		ret=EVP_CIPHER_get_asn1_iv(c, type);
++		}
+ 	else
+ 		ret=-1;
+ 	return(ret);
+diff -up openssl-1.0.1e/crypto/evp/evp_test.c.wrap openssl-1.0.1e/crypto/evp/evp_test.c
+--- openssl-1.0.1e/crypto/evp/evp_test.c.wrap	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/crypto/evp/evp_test.c	2014-09-09 16:12:25.856801673 +0200
+@@ -141,7 +141,7 @@ static void test1(const EVP_CIPHER *c,co
+     {
+     EVP_CIPHER_CTX ctx;
+     unsigned char out[4096];
+-    int outl,outl2;
++    int outl,outl2,mode;
+ 
+     printf("Testing cipher %s%s\n",EVP_CIPHER_name(c),
+ 	   (encdec == 1 ? "(encrypt)" : (encdec == 0 ? "(decrypt)" : "(encrypt/decrypt)")));
+@@ -151,6 +151,7 @@ static void test1(const EVP_CIPHER *c,co
+     hexdump(stdout,"Plaintext",plaintext,pn);
+     hexdump(stdout,"Ciphertext",ciphertext,cn);
+     
++    mode = EVP_CIPHER_mode(c); 
+     if(kn != c->key_len)
+ 	{
+ 	fprintf(stderr,"Key length doesn't match, got %d expected %lu\n",kn,
+@@ -158,9 +159,19 @@ static void test1(const EVP_CIPHER *c,co
+ 	test1_exit(5);
+ 	}
+     EVP_CIPHER_CTX_init(&ctx);
++    EVP_CIPHER_CTX_set_flags(&ctx,EVP_CIPHER_CTX_FLAG_WRAP_ALLOW);
+     if (encdec != 0)
+         {
+-	if(!EVP_EncryptInit_ex(&ctx,c,NULL,key,iv))
++	if (mode == EVP_CIPH_WRAP_MODE)
++	    {
++	    if(!EVP_EncryptInit_ex(&ctx,c,NULL,key,in ? iv : NULL))
++	        {
++		fprintf(stderr,"EncryptInit failed\n");
++		ERR_print_errors_fp(stderr);
++		test1_exit(10);
++		}
++	    }
++	else if(!EVP_EncryptInit_ex(&ctx,c,NULL,key,iv))
+ 	    {
+ 	    fprintf(stderr,"EncryptInit failed\n");
+ 	    ERR_print_errors_fp(stderr);
+@@ -199,7 +210,16 @@ static void test1(const EVP_CIPHER *c,co
+ 
+     if (encdec <= 0)
+         {
+-	if(!EVP_DecryptInit_ex(&ctx,c,NULL,key,iv))
++	if (mode == EVP_CIPH_WRAP_MODE)
++	    {
++	    if(!EVP_DecryptInit_ex(&ctx,c,NULL,key,in ? iv : NULL))
++	        {
++		fprintf(stderr,"EncryptInit failed\n");
++		ERR_print_errors_fp(stderr);
++		test1_exit(10);
++		}
++	    }
++	else if(!EVP_DecryptInit_ex(&ctx,c,NULL,key,iv))
+ 	    {
+ 	    fprintf(stderr,"DecryptInit failed\n");
+ 	    ERR_print_errors_fp(stderr);
+@@ -339,7 +359,7 @@ int main(int argc,char **argv)
+ 	perror(szTestFile);
+ 	EXIT(2);
+ 	}
+-
++    ERR_load_crypto_strings();
+     /* Load up the software EVP_CIPHER and EVP_MD definitions */
+     OpenSSL_add_all_ciphers();
+     OpenSSL_add_all_digests();
+diff -up openssl-1.0.1e/crypto/evp/evptests.txt.wrap openssl-1.0.1e/crypto/evp/evptests.txt
+--- openssl-1.0.1e/crypto/evp/evptests.txt.wrap	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/crypto/evp/evptests.txt	2014-09-09 16:12:25.856801673 +0200
+@@ -332,3 +332,15 @@ SEED-ECB:0000000000000000000000000000000
+ SEED-ECB:000102030405060708090A0B0C0D0E0F::00000000000000000000000000000000:C11F22F20140505084483597E4370F43:1
+ SEED-ECB:4706480851E61BE85D74BFB3FD956185::83A2F8A288641FB9A4E9A5CC2F131C7D:EE54D13EBCAE706D226BC3142CD40D4A:1
+ SEED-ECB:28DBC3BC49FFD87DCFA509B11D422BE7::B41E6BE2EBA84A148E2EED84593C5EC7:9B9B7BFCD1813CB95D0B3618F40F5122:1
++
++# AES wrap tests from RFC3394
++id-aes128-wrap:000102030405060708090A0B0C0D0E0F::00112233445566778899AABBCCDDEEFF:1FA68B0A8112B447AEF34BD8FB5A7B829D3E862371D2CFE5
++id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF:96778B25AE6CA435F92B5B97C050AED2468AB8A17AD84E5D
++id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF:64E8C3F9CE0F5BA263E9777905818A2A93C8191E7D6E8AE7
++id-aes192-wrap:000102030405060708090A0B0C0D0E0F1011121314151617::00112233445566778899AABBCCDDEEFF0001020304050607:031D33264E15D33268F24EC260743EDCE1C6C7DDEE725A936BA814915C6762D2
++id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF0001020304050607:A8F9BC1612C68B3FF6E6F4FBE30E71E4769C8B80A32CB8958CD5D17D6B254DA1
++id-aes256-wrap:000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F::00112233445566778899AABBCCDDEEFF000102030405060708090A0B0C0D0E0F:28C9F404C4B810F4CBCCB35CFB87F8263F5786E2D80ED326CBC7F0E71A99F43BFB988B9B7A02DD21
++# AES wrap tests from RFC5649
++id-aes192-wrap-pad:5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8::c37b7e6492584340bed12207808941155068f738:138bdeaa9b8fa7fc61f97742e72248ee5ae6ae5360d1ae6a5f54f373fa543b6a
++id-aes192-wrap-pad:5840df6e29b02af1ab493b705bf16ea1ae8338f4dcc176a8::466f7250617369:afbeb0f07dfbf5419200f2ccb50bb24f
++
+diff -up openssl-1.0.1e/crypto/modes/Makefile.wrap openssl-1.0.1e/crypto/modes/Makefile
+--- openssl-1.0.1e/crypto/modes/Makefile.wrap	2014-09-09 16:11:24.079378796 +0200
++++ openssl-1.0.1e/crypto/modes/Makefile	2014-09-09 16:12:25.856801673 +0200
+@@ -22,9 +22,9 @@ APPS=
+ 
+ LIB=$(TOP)/libcrypto.a
+ LIBSRC= cbc128.c ctr128.c cts128.c cfb128.c ofb128.c gcm128.c \
+-	ccm128.c xts128.c
++	ccm128.c xts128.c wrap128.c
+ LIBOBJ= cbc128.o ctr128.o cts128.o cfb128.o ofb128.o gcm128.o \
+-	ccm128.o xts128.o $(MODES_ASM_OBJ)
++	ccm128.o xts128.o wrap128.o $(MODES_ASM_OBJ)
+ 
+ SRC= $(LIBSRC)
+ 
+diff -up openssl-1.0.1e/crypto/modes/modes.h.wrap openssl-1.0.1e/crypto/modes/modes.h
+--- openssl-1.0.1e/crypto/modes/modes.h.wrap	2014-09-09 16:11:23.726370665 +0200
++++ openssl-1.0.1e/crypto/modes/modes.h	2014-09-09 16:12:25.857801695 +0200
+@@ -133,3 +133,17 @@ typedef struct xts128_context XTS128_CON
+ 
+ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16],
+ 	const unsigned char *inp, unsigned char *out, size_t len, int enc);
++
++size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
++		unsigned char *out,
++		const unsigned char *in, size_t inlen, block128_f block);
++
++size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
++		unsigned char *out,
++		const unsigned char *in, size_t inlen, block128_f block);
++size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv,
++		unsigned char *out,
++		const unsigned char *in, size_t inlen, block128_f block);
++size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv,
++		unsigned char *out,
++		const unsigned char *in, size_t inlen, block128_f block);
+diff -up openssl-1.0.1e/crypto/modes/wrap128.c.wrap openssl-1.0.1e/crypto/modes/wrap128.c
+--- openssl-1.0.1e/crypto/modes/wrap128.c.wrap	2014-09-09 16:12:25.857801695 +0200
++++ openssl-1.0.1e/crypto/modes/wrap128.c	2014-09-09 16:12:25.857801695 +0200
+@@ -0,0 +1,372 @@
++/* crypto/modes/wrap128.c */
++/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
++ * project.
++ * Mode with padding contributed by Petr Spacek (pspacek@redhat.com).
++ */
++/* ====================================================================
++ * Copyright (c) 2013 The OpenSSL Project.  All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ *    notice, this list of conditions and the following disclaimer. 
++ *
++ * 2. Redistributions in binary form must reproduce the above copyright
++ *    notice, this list of conditions and the following disclaimer in
++ *    the documentation and/or other materials provided with the
++ *    distribution.
++ *
++ * 3. All advertising materials mentioning features or use of this
++ *    software must display the following acknowledgment:
++ *    "This product includes software developed by the OpenSSL Project
++ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
++ *
++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
++ *    endorse or promote products derived from this software without
++ *    prior written permission. For written permission, please contact
++ *    licensing@OpenSSL.org.
++ *
++ * 5. Products derived from this software may not be called "OpenSSL"
++ *    nor may "OpenSSL" appear in their names without prior written
++ *    permission of the OpenSSL Project.
++ *
++ * 6. Redistributions of any form whatsoever must retain the following
++ *    acknowledgment:
++ *    "This product includes software developed by the OpenSSL Project
++ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
++ *
++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
++ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
++ * OF THE POSSIBILITY OF SUCH DAMAGE.
++ * ====================================================================
++ */
++
++/**  Beware!
++ *
++ *  Following wrapping modes were designed for AES but this implementation
++ *  allows you to use them for any 128 bit block cipher.
++ */
++
++#include "cryptlib.h"
++#include <openssl/modes.h>
++
++/** RFC 3394 section 2.2.3.1 Default Initial Value */
++static const unsigned char default_iv[] = {
++  0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6,
++};
++
++/** RFC 5649 section 3 Alternative Initial Value 32-bit constant */
++static const unsigned char default_aiv[] = {
++  0xA6, 0x59, 0x59, 0xA6
++};
++
++/** Input size limit: lower than maximum of standards but far larger than
++ *  anything that will be used in practice.
++ */
++#define CRYPTO128_WRAP_MAX (1UL << 31)
++
++/** Wrapping according to RFC 3394 section 2.2.1.
++ *
++ *  @param[in]  key    Key value. 
++ *  @param[in]  iv     IV value. Length = 8 bytes. NULL = use default_iv.
++ *  @param[in]  in     Plain text as n 64-bit blocks, n >= 2.
++ *  @param[in]  inlen  Length of in.
++ *  @param[out] out    Cipher text. Minimal buffer length = (inlen + 8) bytes.
++ *                     Input and output buffers can overlap if block function
++ *                     supports that.
++ *  @param[in]  block  Block processing function.
++ *  @return            0 if inlen does not consist of n 64-bit blocks, n >= 2.
++ *                     or if inlen > CRYPTO128_WRAP_MAX.
++ *                     Output length if wrapping succeeded.
++ */
++size_t CRYPTO_128_wrap(void *key, const unsigned char *iv,
++		unsigned char *out,
++		const unsigned char *in, size_t inlen, block128_f block)
++	{
++	unsigned char *A, B[16], *R;
++	size_t i, j, t;
++	if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX))
++		return 0;
++	A = B;
++	t = 1;
++	memmove(out + 8, in, inlen);
++	if (!iv)
++		iv = default_iv;
++
++	memcpy(A, iv, 8);
++
++	for (j = 0; j < 6; j++)
++		{
++		R = out + 8;
++		for (i = 0; i < inlen; i += 8, t++, R += 8)
++			{
++			memcpy(B + 8, R, 8);
++			block(B, B, key);
++			A[7] ^= (unsigned char)(t & 0xff);
++			if (t > 0xff)	
++				{
++				A[6] ^= (unsigned char)((t >> 8) & 0xff);
++				A[5] ^= (unsigned char)((t >> 16) & 0xff);
++				A[4] ^= (unsigned char)((t >> 24) & 0xff);
++				}
++			memcpy(R, B + 8, 8);
++			}
++		}
++	memcpy(out, A, 8);
++	return inlen + 8;
++	}
++
++
++/** Unwrapping according to RFC 3394 section 2.2.2 steps 1-2.
++ *  IV check (step 3) is responsibility of the caller.
++ *
++ *  @param[in]  key    Key value. 
++ *  @param[out] iv     Unchecked IV value. Minimal buffer length = 8 bytes.
++ *  @param[out] out    Plain text without IV.
++ *                     Minimal buffer length = (inlen - 8) bytes.
++ *                     Input and output buffers can overlap if block function
++ *                     supports that.
++ *  @param[in]  in     Ciphertext text as n 64-bit blocks
++ *  @param[in]  inlen  Length of in.
++ *  @param[in]  block  Block processing function.
++ *  @return            0 if inlen is out of range [24, CRYPTO128_WRAP_MAX]
++ *                     or if inlen is not multiply of 8.
++ *                     Output length otherwise.
++ */
++static size_t crypto_128_unwrap_raw(void *key, unsigned char *iv,
++		unsigned char *out, const unsigned char *in,
++		size_t inlen, block128_f block)
++	{
++	unsigned char *A, B[16], *R;
++	size_t i, j, t;
++	inlen -= 8;
++	if ((inlen & 0x7) || (inlen < 16) || (inlen > CRYPTO128_WRAP_MAX))
++		return 0;
++	A = B;
++	t =  6 * (inlen >> 3);
++	memcpy(A, in, 8);
++	memmove(out, in + 8, inlen);
++	for (j = 0; j < 6; j++)
++		{
++		R = out + inlen - 8;
++		for (i = 0; i < inlen; i += 8, t--, R -= 8)
++			{
++			A[7] ^= (unsigned char)(t & 0xff);
++			if (t > 0xff)	
++				{
++				A[6] ^= (unsigned char)((t >> 8) & 0xff);
++				A[5] ^= (unsigned char)((t >> 16) & 0xff);
++				A[4] ^= (unsigned char)((t >> 24) & 0xff);
++				}
++			memcpy(B + 8, R, 8);
++			block(B, B, key);
++			memcpy(R, B + 8, 8);
++			}
++		}
++	memcpy(iv, A, 8);
++	return inlen;
++	}
++
++/** Unwrapping according to RFC 3394 section 2.2.2 including IV check.
++ *  First block of plain text have to match supplied IV otherwise an error is
++ *  returned.
++ *
++ *  @param[in]  key    Key value. 
++ *  @param[out] iv     Unchecked IV value. Minimal buffer length = 8 bytes.
++ *  @param[out] out    Plain text without IV.
++ *                     Minimal buffer length = (inlen - 8) bytes.
++ *                     Input and output buffers can overlap if block function
++ *                     supports that.
++ *  @param[in]  in     Ciphertext text as n 64-bit blocks
++ *  @param[in]  inlen  Length of in.
++ *  @param[in]  block  Block processing function.
++ *  @return            0 if inlen is out of range [24, CRYPTO128_WRAP_MAX]
++ *                     or if inlen is not multiply of 8
++ *                     or if IV doesn't match expected value.
++ *                     Output length otherwise.
++ */
++size_t CRYPTO_128_unwrap(void *key, const unsigned char *iv,
++		unsigned char *out, const unsigned char *in, size_t inlen,
++		block128_f block)
++	{
++	size_t ret;
++	unsigned char got_iv[8];
++
++	ret = crypto_128_unwrap_raw(key, got_iv, out, in, inlen, block);
++	if (ret != inlen)
++		return ret;
++
++	if (!iv)
++		iv = default_iv;
++	if (CRYPTO_memcmp(out, iv, 8))
++		{
++		OPENSSL_cleanse(out, inlen);
++		return 0;
++		}
++	return inlen;
++	}
++
++/** Wrapping according to RFC 5649 section 4.1.
++ *
++ *  @param[in]  key    Key value. 
++ *  @param[in]  icv    (Non-standard) IV, 4 bytes. NULL = use default_aiv.
++ *  @param[out] out    Cipher text. Minimal buffer length = (inlen + 15) bytes.
++ *                     Input and output buffers can overlap if block function
++ *                     supports that.
++ *  @param[in]  in     Plain text as n 64-bit blocks, n >= 2.
++ *  @param[in]  inlen  Length of in.
++ *  @param[in]  block  Block processing function.
++ *  @return            0 if inlen is out of range [1, CRYPTO128_WRAP_MAX].
++ *                     Output length if wrapping succeeded.
++ */
++size_t CRYPTO_128_wrap_pad(void *key, const unsigned char *icv,
++		unsigned char *out,
++		const unsigned char *in, size_t inlen, block128_f block)
++	{
++	/* n: number of 64-bit blocks in the padded key data */
++	const size_t blocks_padded = (inlen + 8) / 8;
++	const size_t padded_len = blocks_padded * 8;
++	const size_t padding_len = padded_len - inlen;
++	/* RFC 5649 section 3: Alternative Initial Value */
++	unsigned char aiv[8];
++	int ret;
++
++	/* Section 1: use 32-bit fixed field for plaintext octet length */
++	if (inlen == 0 || inlen >= CRYPTO128_WRAP_MAX)
++		return 0;
++
++	/* Section 3: Alternative Initial Value */
++	if (!icv)
++		memcpy(aiv, default_aiv, 4);
++	else
++		memcpy(aiv, icv, 4); /* Standard doesn't mention this. */
++
++	aiv[4] = (inlen >> 24) & 0xFF;
++	aiv[5] = (inlen >> 16) & 0xFF;
++	aiv[6] = (inlen >> 8) & 0xFF;
++	aiv[7] = inlen & 0xFF;
++
++	if (padded_len == 8)
++		{
++		/* Section 4.1 - special case in step 2:
++		 * If the padded plaintext contains exactly eight octets, then
++		 * prepend the AIV and encrypt the resulting 128-bit block
++		 * using AES in ECB mode. */
++		memmove(out + 8, in, inlen);
++		memcpy(out, aiv, 8);
++		memset(out + 8 + inlen, 0, padding_len);
++		block(out, out, key);
++		ret = 16; /* AIV + padded input */
++		}
++		else
++		{
++		memmove(out, in, inlen);
++		memset(out + inlen, 0, padding_len); /* Section 4.1 step 1 */
++		ret = CRYPTO_128_wrap(key, aiv, out, out, padded_len, block);
++		}
++
++	return ret;
++	}
++
++/** Unwrapping according to RFC 5649 section 4.2.
++ *
++ *  @param[in]  key    Key value. 
++ *  @param[in]  icv    (Non-standard) IV, 4 bytes. NULL = use default_aiv.
++ *  @param[out] out    Plain text. Minimal buffer length = inlen bytes.
++ *                     Input and output buffers can overlap if block function
++ *                     supports that.
++ *  @param[in]  in     Ciphertext text as n 64-bit blocks
++ *  @param[in]  inlen  Length of in.
++ *  @param[in]  block  Block processing function.
++ *  @return            0 if inlen is out of range [16, CRYPTO128_WRAP_MAX],
++ *                     or if inlen is not multiply of 8
++ *                     or if IV and message length indicator doesn't match.
++ *                     Output length if unwrapping succeeded and IV matches.
++ */
++size_t CRYPTO_128_unwrap_pad(void *key, const unsigned char *icv,
++		unsigned char *out,
++		const unsigned char *in, size_t inlen, block128_f block)
++	{
++	/* n: number of 64-bit blocks in the padded key data */
++	size_t n = inlen / 8 - 1;
++	size_t padded_len;
++	size_t padding_len;
++	size_t ptext_len;
++	/* RFC 5649 section 3: Alternative Initial Value */
++	unsigned char aiv[8];
++	static unsigned char zeros[8] = {0x0};
++	size_t ret;
++
++	/* Section 4.2: Cipher text length has to be (n+1) 64-bit blocks. */
++	if ((inlen & 0x7) != 0 || inlen < 16 || inlen >= CRYPTO128_WRAP_MAX)
++		return 0;
++
++	memmove(out, in, inlen);
++	if (inlen == 16)
++		{
++		/* Section 4.2 - special case in step 1:
++		 * When n=1, the ciphertext contains exactly two 64-bit
++		 * blocks and they are decrypted as a single AES
++		 * block using AES in ECB mode:
++		 * AIV | P[1] = DEC(K, C[0] | C[1])
++		 */
++		block(out, out, key);
++		memcpy(aiv, out, 8);
++		/* Remove AIV */
++		memmove(out, out + 8, 8);
++		padded_len = 8;
++		}
++		else
++		{
++		padded_len = inlen - 8;
++		ret = crypto_128_unwrap_raw(key, aiv, out, out, inlen, block);
++		if (padded_len != ret)
++			{
++			OPENSSL_cleanse(out, inlen);
++			return 0;
++			}
++		}
++
++	/* Section 3: AIV checks: Check that MSB(32,A) = A65959A6.
++	 * Optionally a user-supplied value can be used
++	 * (even if standard doesn't mention this). */
++	if ((!icv && CRYPTO_memcmp(aiv, default_aiv, 4))
++		|| (icv && CRYPTO_memcmp(aiv, icv, 4)))
++		{
++		OPENSSL_cleanse(out, inlen);
++		return 0;
++		}
++
++	/* Check that 8*(n-1) < LSB(32,AIV) <= 8*n.
++	 * If so, let ptext_len = LSB(32,AIV). */
++
++	ptext_len = (aiv[4] << 24) | (aiv[5] << 16) | (aiv[6] << 8) | aiv[7];
++	if (8*(n-1) >= ptext_len || ptext_len > 8*n)
++		{
++		OPENSSL_cleanse(out, inlen);
++		return 0;
++		}
++
++	/* Check that the rightmost padding_len octets of the output data
++	 * are zero. */
++	padding_len = padded_len - ptext_len;
++	if (CRYPTO_memcmp(out + ptext_len, zeros, padding_len) != 0)
++		{
++		OPENSSL_cleanse(out, inlen);
++		return 0;
++		}
++
++	/* Section 4.2 step 3: Remove padding */
++	return ptext_len;
++	}
diff --git a/SOURCES/openssl-1.0.1e-fallback-scsv.patch b/SOURCES/openssl-1.0.1e-fallback-scsv.patch
index 0e28c00..0c307c3 100644
--- a/SOURCES/openssl-1.0.1e-fallback-scsv.patch
+++ b/SOURCES/openssl-1.0.1e-fallback-scsv.patch
@@ -78,8 +78,8 @@ diff -up openssl-1.0.1e/doc/ssl/SSL_CTX_set_mode.pod.fallback-scsv openssl-1.0.1
  
  =head1 RETURN VALUES
 diff -up openssl-1.0.1e/ssl/dtls1.h.fallback-scsv openssl-1.0.1e/ssl/dtls1.h
---- openssl-1.0.1e/ssl/dtls1.h.fallback-scsv	2014-10-15 14:39:30.862907615 +0200
-+++ openssl-1.0.1e/ssl/dtls1.h	2014-10-15 14:39:30.973910121 +0200
+--- openssl-1.0.1e/ssl/dtls1.h.fallback-scsv	2014-10-15 14:45:25.492913542 +0200
++++ openssl-1.0.1e/ssl/dtls1.h	2014-10-15 14:45:25.596915890 +0200
 @@ -84,6 +84,8 @@ extern "C" {
  #endif
  
@@ -95,8 +95,8 @@ diff -up openssl-1.0.1e/ssl/dtls1.h.fallback-scsv openssl-1.0.1e/ssl/dtls1.h
  #endif
 -
 diff -up openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv openssl-1.0.1e/ssl/d1_lib.c
---- openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv	2014-10-15 14:39:30.911908721 +0200
-+++ openssl-1.0.1e/ssl/d1_lib.c	2014-10-15 14:39:30.973910121 +0200
+--- openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv	2014-10-15 14:45:25.539914603 +0200
++++ openssl-1.0.1e/ssl/d1_lib.c	2014-10-15 14:45:25.596915890 +0200
 @@ -263,6 +263,16 @@ long dtls1_ctrl(SSL *s, int cmd, long la
  	case DTLS_CTRL_LISTEN:
  		ret = dtls1_listen(s, parg);
@@ -116,7 +116,7 @@ diff -up openssl-1.0.1e/ssl/d1_lib.c.fallback-scsv openssl-1.0.1e/ssl/d1_lib.c
  		ret = ssl3_ctrl(s, cmd, larg, parg);
 diff -up openssl-1.0.1e/ssl/ssl_err.c.fallback-scsv openssl-1.0.1e/ssl/ssl_err.c
 --- openssl-1.0.1e/ssl/ssl_err.c.fallback-scsv	2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/ssl/ssl_err.c	2014-10-15 14:39:30.973910121 +0200
++++ openssl-1.0.1e/ssl/ssl_err.c	2014-10-15 14:45:25.596915890 +0200
 @@ -382,6 +382,7 @@ static ERR_STRING_DATA SSL_str_reasons[]
  {ERR_REASON(SSL_R_HTTPS_PROXY_REQUEST)   ,"https proxy request"},
  {ERR_REASON(SSL_R_HTTP_REQUEST)          ,"http request"},
@@ -134,8 +134,8 @@ diff -up openssl-1.0.1e/ssl/ssl_err.c.fallback-scsv openssl-1.0.1e/ssl/ssl_err.c
  {ERR_REASON(SSL_R_TLSV1_ALERT_INTERNAL_ERROR),"tlsv1 alert internal error"},
  {ERR_REASON(SSL_R_TLSV1_ALERT_NO_RENEGOTIATION),"tlsv1 alert no renegotiation"},
 diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h
---- openssl-1.0.1e/ssl/ssl.h.fallback-scsv	2014-10-15 14:39:30.940909375 +0200
-+++ openssl-1.0.1e/ssl/ssl.h	2014-10-15 14:41:46.174962343 +0200
+--- openssl-1.0.1e/ssl/ssl.h.fallback-scsv	2014-10-15 14:45:25.588915709 +0200
++++ openssl-1.0.1e/ssl/ssl.h	2014-10-15 14:47:04.423146935 +0200
 @@ -638,6 +638,10 @@ struct ssl_session_st
   * TLS only.)  "Released" buffers are put onto a free-list in the context
   * or just freed (depending on the context's setting for freelist_max_len). */
@@ -155,8 +155,8 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h
  
  #define SSL_ERROR_NONE			0
  #define SSL_ERROR_SSL			1
-@@ -1565,6 +1570,8 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
- 
+@@ -1566,6 +1571,8 @@ DECLARE_PEM_rw(SSL_SESSION, SSL_SESSION)
+ #define SSL_CTRL_SET_ECDH_AUTO			94
  #define SSL_CTRL_GET_SERVER_TMP_KEY		109
  
 +#define SSL_CTRL_CHECK_PROTO_VERSION		119
@@ -164,7 +164,7 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h
  #define DTLSv1_get_timeout(ssl, arg) \
  	SSL_ctrl(ssl,DTLS_CTRL_GET_TIMEOUT,0, (void *)arg)
  #define DTLSv1_handle_timeout(ssl) \
-@@ -2298,6 +2305,7 @@ void ERR_load_SSL_strings(void);
+@@ -2304,6 +2311,7 @@ void ERR_load_SSL_strings(void);
  #define SSL_R_HTTPS_PROXY_REQUEST			 155
  #define SSL_R_HTTP_REQUEST				 156
  #define SSL_R_ILLEGAL_PADDING				 283
@@ -172,7 +172,7 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h
  #define SSL_R_INCONSISTENT_COMPRESSION			 340
  #define SSL_R_INVALID_CHALLENGE_LENGTH			 158
  #define SSL_R_INVALID_COMMAND				 280
-@@ -2444,6 +2452,7 @@ void ERR_load_SSL_strings(void);
+@@ -2450,6 +2458,7 @@ void ERR_load_SSL_strings(void);
  #define SSL_R_TLSV1_ALERT_DECRYPTION_FAILED		 1021
  #define SSL_R_TLSV1_ALERT_DECRYPT_ERROR			 1051
  #define SSL_R_TLSV1_ALERT_EXPORT_RESTRICTION		 1060
@@ -181,8 +181,8 @@ diff -up openssl-1.0.1e/ssl/ssl.h.fallback-scsv openssl-1.0.1e/ssl/ssl.h
  #define SSL_R_TLSV1_ALERT_INTERNAL_ERROR		 1080
  #define SSL_R_TLSV1_ALERT_NO_RENEGOTIATION		 1100
 diff -up openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv openssl-1.0.1e/ssl/ssl_lib.c
---- openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv	2014-10-15 14:39:30.912908743 +0200
-+++ openssl-1.0.1e/ssl/ssl_lib.c	2014-10-15 14:39:30.975910166 +0200
+--- openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv	2014-10-15 14:45:25.589915731 +0200
++++ openssl-1.0.1e/ssl/ssl_lib.c	2014-10-15 14:45:25.597915912 +0200
 @@ -1383,6 +1383,8 @@ int ssl_cipher_list_to_bytes(SSL *s,STAC
  
  	if (sk == NULL) return(0);
@@ -289,8 +289,8 @@ diff -up openssl-1.0.1e/ssl/ssl_lib.c.fallback-scsv openssl-1.0.1e/ssl/ssl_lib.c
  		p+=n;
  		if (c != NULL)
 diff -up openssl-1.0.1e/ssl/ssl3.h.fallback-scsv openssl-1.0.1e/ssl/ssl3.h
---- openssl-1.0.1e/ssl/ssl3.h.fallback-scsv	2014-10-15 14:39:30.949909579 +0200
-+++ openssl-1.0.1e/ssl/ssl3.h	2014-10-15 14:39:30.975910166 +0200
+--- openssl-1.0.1e/ssl/ssl3.h.fallback-scsv	2014-10-15 14:45:25.570915303 +0200
++++ openssl-1.0.1e/ssl/ssl3.h	2014-10-15 14:45:25.598915935 +0200
 @@ -128,9 +128,14 @@
  extern "C" {
  #endif
@@ -308,8 +308,8 @@ diff -up openssl-1.0.1e/ssl/ssl3.h.fallback-scsv openssl-1.0.1e/ssl/ssl3.h
  #define SSL3_CK_RSA_NULL_SHA			0x03000002
  #define SSL3_CK_RSA_RC4_40_MD5 			0x03000003
 diff -up openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv openssl-1.0.1e/ssl/s2_lib.c
---- openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv	2014-10-15 14:39:30.901908495 +0200
-+++ openssl-1.0.1e/ssl/s2_lib.c	2014-10-15 14:39:30.975910166 +0200
+--- openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv	2014-10-15 14:45:25.526914309 +0200
++++ openssl-1.0.1e/ssl/s2_lib.c	2014-10-15 14:45:25.598915935 +0200
 @@ -391,6 +391,8 @@ long ssl2_ctrl(SSL *s, int cmd, long lar
  	case SSL_CTRL_GET_SESSION_REUSED:
  		ret=s->hit;
@@ -330,7 +330,7 @@ diff -up openssl-1.0.1e/ssl/s2_lib.c.fallback-scsv openssl-1.0.1e/ssl/s2_lib.c
  		p[2]=((unsigned char)(l     ))&0xFF;
 diff -up openssl-1.0.1e/ssl/s23_clnt.c.fallback-scsv openssl-1.0.1e/ssl/s23_clnt.c
 --- openssl-1.0.1e/ssl/s23_clnt.c.fallback-scsv	2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/ssl/s23_clnt.c	2014-10-15 14:39:30.975910166 +0200
++++ openssl-1.0.1e/ssl/s23_clnt.c	2014-10-15 14:45:25.598915935 +0200
 @@ -715,6 +715,9 @@ static int ssl23_get_server_hello(SSL *s
  			goto err;
  			}
@@ -342,8 +342,8 @@ diff -up openssl-1.0.1e/ssl/s23_clnt.c.fallback-scsv openssl-1.0.1e/ssl/s23_clnt
  			{
  			/* fatal alert */
 diff -up openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv openssl-1.0.1e/ssl/s23_srvr.c
---- openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv	2014-10-15 14:39:30.966909962 +0200
-+++ openssl-1.0.1e/ssl/s23_srvr.c	2014-10-15 14:39:30.976910188 +0200
+--- openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv	2014-10-15 14:45:25.584915619 +0200
++++ openssl-1.0.1e/ssl/s23_srvr.c	2014-10-15 14:45:25.598915935 +0200
 @@ -421,6 +421,9 @@ int ssl23_get_client_hello(SSL *s)
  			}
  		}
@@ -356,7 +356,7 @@ diff -up openssl-1.0.1e/ssl/s23_srvr.c.fallback-scsv openssl-1.0.1e/ssl/s23_srvr
  		{
 diff -up openssl-1.0.1e/ssl/s3_enc.c.fallback-scsv openssl-1.0.1e/ssl/s3_enc.c
 --- openssl-1.0.1e/ssl/s3_enc.c.fallback-scsv	2013-02-11 16:26:04.000000000 +0100
-+++ openssl-1.0.1e/ssl/s3_enc.c	2014-10-15 14:39:30.976910188 +0200
++++ openssl-1.0.1e/ssl/s3_enc.c	2014-10-15 14:45:25.598915935 +0200
 @@ -892,7 +892,7 @@ int ssl3_alert_code(int code)
  	case SSL_AD_BAD_CERTIFICATE_STATUS_RESPONSE: return(SSL3_AD_HANDSHAKE_FAILURE);
  	case SSL_AD_BAD_CERTIFICATE_HASH_VALUE: return(SSL3_AD_HANDSHAKE_FAILURE);
@@ -367,9 +367,9 @@ diff -up openssl-1.0.1e/ssl/s3_enc.c.fallback-scsv openssl-1.0.1e/ssl/s3_enc.c
  	}
 -
 diff -up openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv openssl-1.0.1e/ssl/s3_lib.c
---- openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv	2014-10-15 14:39:30.941909398 +0200
-+++ openssl-1.0.1e/ssl/s3_lib.c	2014-10-15 14:39:30.976910188 +0200
-@@ -3388,6 +3388,33 @@ long ssl3_ctrl(SSL *s, int cmd, long lar
+--- openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv	2014-10-15 14:45:25.590915754 +0200
++++ openssl-1.0.1e/ssl/s3_lib.c	2014-10-15 14:45:25.599915957 +0200
+@@ -3394,6 +3394,33 @@ long ssl3_ctrl(SSL *s, int cmd, long lar
  			EVP_PKEY_free(ptmp);
  			return 0;
  			}
@@ -403,7 +403,7 @@ diff -up openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv openssl-1.0.1e/ssl/s3_lib.c
  	default:
  		break;
  		}
-@@ -3747,6 +3774,7 @@ long ssl3_ctx_callback_ctrl(SSL_CTX *ctx
+@@ -3759,6 +3786,7 @@ long ssl3_ctx_callback_ctrl(SSL_CTX *ctx
  		break;
  #endif
  #endif
@@ -411,14 +411,14 @@ diff -up openssl-1.0.1e/ssl/s3_lib.c.fallback-scsv openssl-1.0.1e/ssl/s3_lib.c
  	default:
  		return(0);
  		}
-@@ -4317,4 +4345,3 @@ long ssl_get_algorithm2(SSL *s)
+@@ -4337,4 +4365,3 @@ long ssl_get_algorithm2(SSL *s)
  		return SSL_HANDSHAKE_MAC_SHA256 | TLS1_PRF_SHA256;
  	return alg2;
  	}
 -		
 diff -up openssl-1.0.1e/ssl/tls1.h.fallback-scsv openssl-1.0.1e/ssl/tls1.h
---- openssl-1.0.1e/ssl/tls1.h.fallback-scsv	2014-10-15 14:39:30.775905650 +0200
-+++ openssl-1.0.1e/ssl/tls1.h	2014-10-15 14:39:30.976910188 +0200
+--- openssl-1.0.1e/ssl/tls1.h.fallback-scsv	2014-10-15 14:45:25.382911058 +0200
++++ openssl-1.0.1e/ssl/tls1.h	2014-10-15 14:45:25.599915957 +0200
 @@ -159,17 +159,19 @@ extern "C" {
  
  #define TLS1_ALLOW_EXPERIMENTAL_CIPHERSUITES	0
@@ -454,8 +454,8 @@ diff -up openssl-1.0.1e/ssl/tls1.h.fallback-scsv openssl-1.0.1e/ssl/tls1.h
  #define TLS1_AD_NO_RENEGOTIATION	100
  /* codes 110-114 are from RFC3546 */
 diff -up openssl-1.0.1e/ssl/t1_enc.c.fallback-scsv openssl-1.0.1e/ssl/t1_enc.c
---- openssl-1.0.1e/ssl/t1_enc.c.fallback-scsv	2014-10-15 14:39:30.936909285 +0200
-+++ openssl-1.0.1e/ssl/t1_enc.c	2014-10-15 14:39:30.977910211 +0200
+--- openssl-1.0.1e/ssl/t1_enc.c.fallback-scsv	2014-10-15 14:45:25.557915009 +0200
++++ openssl-1.0.1e/ssl/t1_enc.c	2014-10-15 14:45:25.599915957 +0200
 @@ -1265,6 +1265,7 @@ int tls1_alert_code(int code)
  	case SSL_AD_BAD_CERTIFICATE_STATUS_RESPONSE: return(TLS1_AD_BAD_CERTIFICATE_STATUS_RESPONSE);
  	case SSL_AD_BAD_CERTIFICATE_HASH_VALUE: return(TLS1_AD_BAD_CERTIFICATE_HASH_VALUE);
diff --git a/SOURCES/openssl-1.0.1e-fips-ec.patch b/SOURCES/openssl-1.0.1e-fips-ec.patch
index 7287dae..e1f648c 100644
--- a/SOURCES/openssl-1.0.1e-fips-ec.patch
+++ b/SOURCES/openssl-1.0.1e-fips-ec.patch
@@ -241,7 +241,7 @@ diff -up openssl-1.0.1e/crypto/ec/ec_key.c.fips-ec openssl-1.0.1e/crypto/ec/ec_k
 +
 +	EVP_PKEY_set1_EC_KEY(pk, key);
 +
-+	if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL))
++	if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL))
 +		ret = 1;
 +
 +	err:
diff --git a/SOURCES/openssl-1.0.1e-fips.patch b/SOURCES/openssl-1.0.1e-fips.patch
index f5496a0..d1a7e7f 100644
--- a/SOURCES/openssl-1.0.1e-fips.patch
+++ b/SOURCES/openssl-1.0.1e-fips.patch
@@ -1008,7 +1008,7 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips openssl-1.0.1e/crypto/dsa/dsa_
 +
 +	EVP_PKEY_set1_DSA(pk, dsa);
 +
-+	if (fips_pkey_signature_test(pk, tbs, 0, NULL, 0, NULL, 0, NULL))
++	if (fips_pkey_signature_test(pk, tbs, -1, NULL, 0, NULL, 0, NULL))
 +		ret = 1;
 +
 +	err:
@@ -8660,7 +8660,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_aes_selftest.c.fips openssl-1.0.1e/cryp
 diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips.c
 --- openssl-1.0.1e/crypto/fips/fips.c.fips	2013-10-04 11:48:04.182694181 +0200
 +++ openssl-1.0.1e/crypto/fips/fips.c	2013-10-04 11:48:04.182694181 +0200
-@@ -0,0 +1,489 @@
+@@ -0,0 +1,491 @@
 +/* ====================================================================
 + * Copyright (c) 2003 The OpenSSL Project.  All rights reserved.
 + *
@@ -8990,6 +8990,8 @@ diff -up openssl-1.0.1e/crypto/fips/fips.c.fips openssl-1.0.1e/crypto/fips/fips.
 +		}
 +		free(buf);
 +		free(hex);
++	} else {
++		rv = -1;
 +	}
 +
 +end:
@@ -18135,7 +18137,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_sha_selftest.c.fips openssl-1.0.1e/cryp
 diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c
 --- openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips	2013-10-04 11:48:04.188694316 +0200
 +++ openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c	2013-10-04 11:48:04.188694316 +0200
-@@ -0,0 +1,180 @@
+@@ -0,0 +1,236 @@
 +/* ====================================================================
 + * Copyright (c) 2003 The OpenSSL Project.  All rights reserved.
 + *
@@ -18195,17 +18197,73 @@ diff -up openssl-1.0.1e/crypto/fips/fips_standalone_hmac.c.fips openssl-1.0.1e/c
 +#ifndef FIPSCANISTER_O
 +int FIPS_selftest_failed() { return 0; }
 +void FIPS_selftest_check() {}
-+void OPENSSL_cleanse(void *p,size_t len) {}
 +#endif
 +
++#ifdef OPENSSL_FIPS
++int bn_mul_mont_fpu64(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; };
++int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) { return 0; };
++
 +#if	defined(__i386)   || defined(__i386__)   || defined(_M_IX86) || \
 +	defined(__INTEL__) || \
 +	defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
 +
 +unsigned int  OPENSSL_ia32cap_P[2];
++unsigned long *OPENSSL_ia32cap_loc(void)
++{   if (sizeof(long)==4)
++	/*
++	 * If 32-bit application pulls address of OPENSSL_ia32cap_P[0]
++	 * clear second element to maintain the illusion that vector
++	 * is 32-bit.
++	 */
++	OPENSSL_ia32cap_P[1]=0;
++    return (unsigned long *)OPENSSL_ia32cap_P;
++}
++
++#if defined(OPENSSL_CPUID_OBJ) && !defined(OPENSSL_NO_ASM) && !defined(I386_ONLY)
++#define OPENSSL_CPUID_SETUP
++#if defined(_WIN32)
++typedef unsigned __int64 IA32CAP;
++#else
++typedef unsigned long long IA32CAP;
++#endif
++void OPENSSL_cpuid_setup(void)
++{ static int trigger=0;
++  IA32CAP OPENSSL_ia32_cpuid(void);
++  IA32CAP vec;
++  char *env;
++
++    if (trigger)	return;
++
++    trigger=1;
++    if ((env=getenv("OPENSSL_ia32cap"))) {
++	int off = (env[0]=='~')?1:0;
++#if defined(_WIN32)
++	if (!sscanf(env+off,"%I64i",&vec)) vec = strtoul(env+off,NULL,0);
++#else
++	if (!sscanf(env+off,"%lli",(long long *)&vec)) vec = strtoul(env+off,NULL,0);
++#endif
++	if (off) vec = OPENSSL_ia32_cpuid()&~vec;
++    }
++    else
++	vec = OPENSSL_ia32_cpuid();
++
++    /*
++     * |(1<<10) sets a reserved bit to signal that variable
++     * was initialized already... This is to avoid interference
++     * with cpuid snippets in ELF .init segment.
++     */
++    OPENSSL_ia32cap_P[0] = (unsigned int)vec|(1<<10);
++    OPENSSL_ia32cap_P[1] = (unsigned int)(vec>>32);
++}
 +#endif
 +
-+#ifdef OPENSSL_FIPS
++#else
++unsigned long *OPENSSL_ia32cap_loc(void) { return NULL; }
++#endif
++int OPENSSL_NONPIC_relocated = 0;
++#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ)
++void OPENSSL_cpuid_setup(void) {}
++#endif
 +
 +static void hmac_init(SHA256_CTX *md_ctx,SHA256_CTX *o_ctx,
 +		      const char *key)
@@ -18911,7 +18969,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_test_suite.c.fips openssl-1.0.1e/crypto
 diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Makefile
 --- openssl-1.0.1e/crypto/fips/Makefile.fips	2013-10-04 11:48:04.189694339 +0200
 +++ openssl-1.0.1e/crypto/fips/Makefile	2013-10-04 11:48:04.189694339 +0200
-@@ -0,0 +1,340 @@
+@@ -0,0 +1,341 @@
 +#
 +# OpenSSL/crypto/fips/Makefile
 +#
@@ -19004,6 +19062,7 @@ diff -up openssl-1.0.1e/crypto/fips/Makefile.fips openssl-1.0.1e/crypto/fips/Mak
 +
 +$(EXE): $(PROGRAM).o
 +	FIPS_SHA_ASM=""; for i in $(SHA1_ASM_OBJ) sha256.o; do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../sha/$$i" ; done; \
++	for i in $(CPUID_OBJ); do FIPS_SHA_ASM="$$FIPS_SHA_ASM ../$$i" ; done; \
 +	$(CC) -o $@ $(CFLAGS) $(PROGRAM).o $$FIPS_SHA_ASM
 +
 +# DO NOT DELETE THIS LINE -- make depend depends on it.
diff --git a/SOURCES/openssl-1.0.1e-new-fips-reqs.patch b/SOURCES/openssl-1.0.1e-new-fips-reqs.patch
index 055a087..40527ef 100644
--- a/SOURCES/openssl-1.0.1e-new-fips-reqs.patch
+++ b/SOURCES/openssl-1.0.1e-new-fips-reqs.patch
@@ -36,7 +36,7 @@ diff -up openssl-1.0.1e/crypto/dh/dh.h.fips-reqs openssl-1.0.1e/crypto/dh/dh.h
  #endif
  
  #define OPENSSL_DH_FIPS_MIN_MODULUS_BITS 1024
-+#define OPENSSL_DH_FIPS_MIN_MODULUS_BITS_GEN 2048
++#define OPENSSL_DH_FIPS_MIN_MODULUS_BITS_GEN (getenv("OPENSSL_ENFORCE_MODULUS_BITS")?2048:1024)
  
  #define DH_FLAG_CACHE_MONT_P     0x01
  #define DH_FLAG_NO_EXP_CONSTTIME 0x02 /* new with 0.9.7h; the built-in DH
@@ -80,11 +80,12 @@ diff -up openssl-1.0.1e/crypto/dh/dh_check.c.fips-reqs openssl-1.0.1e/crypto/dh/
 diff -up openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs openssl-1.0.1e/crypto/dsa/dsa_gen.c
 --- openssl-1.0.1e/crypto/dsa/dsa_gen.c.fips-reqs	2013-12-18 12:17:09.749636636 +0100
 +++ openssl-1.0.1e/crypto/dsa/dsa_gen.c	2013-12-18 12:17:09.799637708 +0100
-@@ -159,7 +159,6 @@ int dsa_builtin_paramgen(DSA *ret, size_
+@@ -159,7 +159,7 @@ int dsa_builtin_paramgen(DSA *ret, size_
  	    }
  
  	if (FIPS_module_mode() &&
 -	    (bits != 1024 || qbits != 160) &&
++	    (getenv("OPENSSL_ENFORCE_MODULUS_BITS") || bits != 1024 || qbits != 160) &&
  	    (bits != 2048 || qbits != 224) &&
  	    (bits != 2048 || qbits != 256) &&
  	    (bits != 3072 || qbits != 256))
@@ -95,7 +96,7 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa.h.fips-reqs openssl-1.0.1e/crypto/dsa/dsa
  #endif
  
  #define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS 1024
-+#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN 2048
++#define OPENSSL_DSA_FIPS_MIN_MODULUS_BITS_GEN (getenv("OPENSSL_ENFORCE_MODULUS_BITS")?2048:1024)
  
  #define DSA_FLAG_CACHE_MONT_P	0x01
  #define DSA_FLAG_NO_EXP_CONSTTIME       0x02 /* new with 0.9.7h; the built-in DSA
@@ -124,6 +125,42 @@ diff -up openssl-1.0.1e/crypto/dsa/dsa_key.c.fips-reqs openssl-1.0.1e/crypto/dsa
  		{
  		DSAerr(DSA_F_DSA_BUILTIN_KEYGEN, DSA_R_KEY_SIZE_TOO_SMALL);
  		goto err;
+diff -up openssl-1.0.1e/crypto/fips/fips.c.fips-reqs openssl-1.0.1e/crypto/fips/fips.c
+--- openssl-1.0.1e/crypto/fips/fips.c.fips-reqs	2014-09-24 16:38:43.000000000 +0200
++++ openssl-1.0.1e/crypto/fips/fips.c	2014-09-24 16:37:28.000000000 +0200
+@@ -427,27 +427,25 @@ int FIPS_module_mode_set(int onoff, cons
+ 	    ret = 0;
+ 	    goto end;
+ 	    }
+-	OPENSSL_ia32cap_P[0] |= (1<<28);	/* set "shared cache"	*/
+-	OPENSSL_ia32cap_P[1] &= ~(1<<(60-32));	/* clear AVX		*/
+ 	}
+ #endif
+ 
+-	if(!verify_checksums())
++	if(!FIPS_selftest())
+ 	    {
+-	    FIPSerr(FIPS_F_FIPS_MODULE_MODE_SET,FIPS_R_FINGERPRINT_DOES_NOT_MATCH);
+ 	    fips_selftest_fail = 1;
+ 	    ret = 0;
+ 	    goto end;
+ 	    }
+ 
+-	if(FIPS_selftest())
+-	    fips_set_mode(onoff);
+-	else
++	if(!verify_checksums())
+ 	    {
++	    FIPSerr(FIPS_F_FIPS_MODULE_MODE_SET,FIPS_R_FINGERPRINT_DOES_NOT_MATCH);
+ 	    fips_selftest_fail = 1;
+ 	    ret = 0;
+ 	    goto end;
+ 	    }
++
++	fips_set_mode(onoff);
+ 	ret = 1;
+ 	goto end;
+ 	}
 diff -up openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_dh_selftest.c
 --- openssl-1.0.1e/crypto/fips/fips_dh_selftest.c.fips-reqs	2013-12-18 17:06:36.575114314 +0100
 +++ openssl-1.0.1e/crypto/fips/fips_dh_selftest.c	2013-12-18 17:26:14.409036334 +0100
@@ -397,29 +434,598 @@ diff -up openssl-1.0.1e/crypto/fips/fips_post.c.fips-reqs openssl-1.0.1e/crypto/
  		rv = 0;
  	return rv;
 diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c
---- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs	2013-12-18 12:17:09.761636893 +0100
-+++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c	2013-12-18 12:17:09.799637708 +0100
-@@ -340,6 +340,42 @@ static const unsigned char kat_RSA_X931_
-   0x60, 0x83, 0x18, 0x88, 0xA3, 0xF5, 0x59, 0xC3
+--- openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs	2014-03-14 14:47:18.809259727 +0100
++++ openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c	2014-03-14 15:37:26.295687852 +0100
+@@ -60,69 +60,113 @@
+ #ifdef OPENSSL_FIPS
+ 
+ static const unsigned char n[] =
+-"\x00\xBB\xF8\x2F\x09\x06\x82\xCE\x9C\x23\x38\xAC\x2B\x9D\xA8\x71"
+-"\xF7\x36\x8D\x07\xEE\xD4\x10\x43\xA4\x40\xD6\xB6\xF0\x74\x54\xF5"
+-"\x1F\xB8\xDF\xBA\xAF\x03\x5C\x02\xAB\x61\xEA\x48\xCE\xEB\x6F\xCD"
+-"\x48\x76\xED\x52\x0D\x60\xE1\xEC\x46\x19\x71\x9D\x8A\x5B\x8B\x80"
+-"\x7F\xAF\xB8\xE0\xA3\xDF\xC7\x37\x72\x3E\xE6\xB4\xB7\xD9\x3A\x25"
+-"\x84\xEE\x6A\x64\x9D\x06\x09\x53\x74\x88\x34\xB2\x45\x45\x98\x39"
+-"\x4E\xE0\xAA\xB1\x2D\x7B\x61\xA5\x1F\x52\x7A\x9A\x41\xF6\xC1\x68"
+-"\x7F\xE2\x53\x72\x98\xCA\x2A\x8F\x59\x46\xF8\xE5\xFD\x09\x1D\xBD"
+-"\xCB";
++"\x00\xc9\xd5\x6d\x9d\x90\xdb\x43\xd6\x02\xed\x96\x88\x13\x8a"
++"\xb2\xbf\x6e\xa1\x06\x10\xb2\x78\x37\xa7\x14\xa8\xff\xdd\x00"
++"\xdd\xb4\x93\xa0\x45\xcc\x96\x90\xed\xad\xa9\xdd\xc4\xd6\xca"
++"\x0c\xf0\xed\x4f\x72\x5e\x21\x49\x9a\x18\x12\x15\x8f\x90\x5a"
++"\xdb\xb6\x33\x99\xa3\xe6\xb4\xf0\xc4\x97\x21\x26\xbb\xe3\xba"
++"\xf2\xff\xa0\x72\xda\x89\x63\x8e\x8b\x3e\x08\x9d\x92\x2a\xbe"
++"\x16\xe1\x43\x15\xfc\x57\xc7\x1f\x09\x11\x67\x1c\xa9\x96\xd1"
++"\x8b\x3e\x80\x93\xc1\x59\xd0\x6d\x39\xf2\xac\x95\xcc\x10\x75"
++"\xe9\x31\x24\xd1\x43\xaf\x68\x52\x4b\xe7\x16\xd7\x49\x65\x6f"
++"\x26\xc0\x86\xad\xc0\x07\x0a\xc1\xe1\x2f\x87\x85\x86\x3b\xdc"
++"\x5a\x99\xbe\xe9\xf9\xb9\xe9\x82\x27\x51\x04\x15\xab\x06\x0e"
++"\x76\x5a\x28\x8d\x92\xbd\xc5\xb5\x7b\xa8\xdf\x4e\x47\xa2\xc1"
++"\xe7\x52\xbf\x47\xf7\x62\xe0\x3a\x6f\x4d\x6a\x4d\x4e\xd4\xb9"
++"\x59\x69\xfa\xb2\x14\xc1\xee\xe6\x2f\x95\xcd\x94\x72\xae\xe4"
++"\xdb\x18\x9a\xc4\xcd\x70\xbd\xee\x31\x16\xb7\x49\x65\xac\x40"
++"\x19\x0e\xb5\x6d\x83\xf1\x36\xbb\x08\x2f\x2e\x4e\x92\x62\xa4"
++"\xff\x50\xdb\x20\x45\xa2\xeb\x16\x7a\xf2\xd5\x28\xc1\xfd\x4e"
++"\x03\x71";
++
+ 
+ static int corrupt_rsa;
+ 
+ static int setrsakey(RSA *key)
+     {
+-    static const unsigned char e[] = "\x11";
++    static const unsigned char e[] = "\x01\x00\x01";
+ 
+     static const unsigned char d[] =
+-"\x00\xA5\xDA\xFC\x53\x41\xFA\xF2\x89\xC4\xB9\x88\xDB\x30\xC1\xCD"
+-"\xF8\x3F\x31\x25\x1E\x06\x68\xB4\x27\x84\x81\x38\x01\x57\x96\x41"
+-"\xB2\x94\x10\xB3\xC7\x99\x8D\x6B\xC4\x65\x74\x5E\x5C\x39\x26\x69"
+-"\xD6\x87\x0D\xA2\xC0\x82\xA9\x39\xE3\x7F\xDC\xB8\x2E\xC9\x3E\xDA"
+-"\xC9\x7F\xF3\xAD\x59\x50\xAC\xCF\xBC\x11\x1C\x76\xF1\xA9\x52\x94"
+-"\x44\xE5\x6A\xAF\x68\xC5\x6C\x09\x2C\xD3\x8D\xC3\xBE\xF5\xD2\x0A"
+-"\x93\x99\x26\xED\x4F\x74\xA1\x3E\xDD\xFB\xE1\xA1\xCE\xCC\x48\x94"
+-"\xAF\x94\x28\xC2\xB7\xB8\x88\x3F\xE4\x46\x3A\x4B\xC8\x5B\x1C\xB3"
+-"\xC1";
++"\x36\x27\x3d\xb1\xf9\x1b\xdb\xa7\xa0\x41\x7f\x12\x23\xac\x23"
++"\x29\x99\xd5\x3a\x7b\x60\x67\x41\x07\x63\x53\xb4\xd2\xe7\x58"
++"\x95\x0a\xc7\x05\xf3\x4e\xb2\xb4\x12\xd4\x70\xdc\x4f\x85\x06"
++"\xd3\xdd\xd8\x63\x27\x3e\x67\x31\x21\x24\x39\x04\xbc\x06\xa4"
++"\xcc\xce\x2b\x7a\xfe\x7b\xad\xde\x11\x6e\xa3\xa5\xe6\x04\x53"
++"\x0e\xa3\x4e\x2d\xb4\x8f\x31\xbf\xca\x75\x25\x52\x02\x85\xde"
++"\x3d\xb2\x72\x43\xb2\x89\x8a\x9a\x34\x41\x26\x3f\x9a\x67\xbe"
++"\xa4\x96\x7b\x0e\x75\xba\xa6\x93\xd5\xb8\xd8\xb8\x57\xf2\x4b"
++"\x0f\x14\x81\xd1\x57\x4e\xf6\x45\x4c\xa6\x3b\xd0\x70\xca\xd3"
++"\x9d\x55\xde\x22\x05\xe7\x8e\x28\x4d\xee\x11\xcf\xb6\x67\x76"
++"\x09\xd3\xe3\x3c\x13\xf9\x99\x34\x10\x7b\xec\x81\x38\xf0\xb6"
++"\x34\x9c\x9b\x50\x6f\x0b\x91\x81\x4d\x89\x94\x04\x7b\xf0\x3c"
++"\xf4\xb1\xb2\x00\x48\x8d\x5a\x8f\x88\x9e\xc5\xab\x3a\x9e\x44"
++"\x3f\x54\xe7\xd9\x6e\x47\xaa\xa1\xbd\x40\x46\x31\xf9\xf0\x34"
++"\xb6\x04\xe1\x2b\x5b\x73\x86\xdd\x3a\x92\x1b\x71\xc7\x3f\x32"
++"\xe5\xc3\xc2\xab\xa1\x7e\xbf\xa4\x52\xa0\xb0\x68\x90\xd1\x20"
++"\x12\x79\xe9\xd7\xc9\x40\xba\xf2\x19\xc7\xa5\x00\x92\x86\x0d"
++"\x01";
+ 
+     static const unsigned char p[] =
+-"\x00\xEE\xCF\xAE\x81\xB1\xB9\xB3\xC9\x08\x81\x0B\x10\xA1\xB5\x60"
+-"\x01\x99\xEB\x9F\x44\xAE\xF4\xFD\xA4\x93\xB8\x1A\x9E\x3D\x84\xF6"
+-"\x32\x12\x4E\xF0\x23\x6E\x5D\x1E\x3B\x7E\x28\xFA\xE7\xAA\x04\x0A"
+-"\x2D\x5B\x25\x21\x76\x45\x9D\x1F\x39\x75\x41\xBA\x2A\x58\xFB\x65"
+-"\x99";
++"\x00\xfc\x5c\x6e\x16\xce\x1f\x03\x7b\xcd\xf7\xb3\x72\xb2\x8f"
++"\x16\x72\xb8\x56\xae\xf7\xcd\x67\xd8\x4e\x7d\x07\xaf\xd5\x43"
++"\x26\xc3\x35\xbe\x43\x8f\x4e\x2f\x1c\x43\x4e\x6b\xd2\xb2\xec"
++"\x52\x6d\x97\x52\x2b\xcc\x5c\x3a\x6b\xf4\x14\xc6\x74\xda\x66"
++"\x38\x1c\x7a\x3f\x84\x2f\xe3\xf9\x5a\xb8\x65\x69\x46\x06\xa3"
++"\x37\x79\xb2\xa1\x5b\x58\xed\x5e\xa7\x5f\x8c\x65\x66\xbb\xd1"
++"\x24\x36\xe6\x37\xa7\x3d\x49\x77\x8a\x8c\x34\xd8\x69\x29\xf3"
++"\x4d\x58\x22\xb0\x51\x24\xb6\x40\xa8\x86\x59\x0a\xb7\xba\x5c"
++"\x97\xda\x57\xe8\x36\xda\x7a\x9c\xad";
+ 
+     static const unsigned char q[] =
+-"\x00\xC9\x7F\xB1\xF0\x27\xF4\x53\xF6\x34\x12\x33\xEA\xAA\xD1\xD9"
+-"\x35\x3F\x6C\x42\xD0\x88\x66\xB1\xD0\x5A\x0F\x20\x35\x02\x8B\x9D"
+-"\x86\x98\x40\xB4\x16\x66\xB4\x2E\x92\xEA\x0D\xA3\xB4\x32\x04\xB5"
+-"\xCF\xCE\x33\x52\x52\x4D\x04\x16\xA5\xA4\x41\xE7\x00\xAF\x46\x15"
+-"\x03";
++"\x00\xcc\xbe\x7b\x09\x69\x06\xee\x45\xbf\x88\x47\x38\xa8\xf8"
++"\x17\xe5\xb6\xba\x67\x55\xe3\xe8\x05\x8b\xb8\xe2\x53\xd6\x8e"
++"\xef\x2c\xe7\x4f\x4a\xf7\x4e\x26\x8d\x85\x0b\x3f\xec\xc3\x1c"
++"\xd4\xeb\xec\x6a\xc8\x72\x2a\x25\x7d\xfd\xa6\x77\x96\xf0\x1e"
++"\xcd\x28\x57\xf8\x37\x30\x75\x6b\xbd\xd4\x7b\x0c\x87\xc5\x6c"
++"\x87\x40\xa5\xbb\x27\x2c\x78\xc9\x74\x5a\x54\x5b\x0b\x30\x6f"
++"\x44\x4a\xfa\x71\xe4\x21\x61\x66\xf9\xee\x65\xde\x7c\x04\xd7"
++"\xfd\xa9\x15\x5b\x7f\xe2\x7a\xba\x69\x86\x72\xa6\x06\x8d\x9b"
++"\x90\x55\x60\x9e\x4c\x5d\xa9\xb6\x55";
++
+ 
+     static const unsigned char dmp1[] =
+-"\x54\x49\x4C\xA6\x3E\xBA\x03\x37\xE4\xE2\x40\x23\xFC\xD6\x9A\x5A"
+-"\xEB\x07\xDD\xDC\x01\x83\xA4\xD0\xAC\x9B\x54\xB0\x51\xF2\xB1\x3E"
+-"\xD9\x49\x09\x75\xEA\xB7\x74\x14\xFF\x59\xC1\xF7\x69\x2E\x9A\x2E"
+-"\x20\x2B\x38\xFC\x91\x0A\x47\x41\x74\xAD\xC9\x3C\x1F\x67\xC9\x81";
++"\x7a\xd6\x12\xd0\x0e\xec\x91\xa9\x85\x8b\xf8\x50\xf0\x11\x2e"
++"\x00\x11\x32\x40\x60\x66\x1f\x11\xee\xc2\x75\x27\x65\x4b\x16"
++"\x67\x16\x95\xd2\x14\xc3\x1d\xb3\x48\x1f\xb7\xe4\x0b\x2b\x74"
++"\xc3\xdb\x50\x27\xf9\x85\x3a\xfa\xa9\x08\x23\xc1\x65\x3d\x34"
++"\x3a\xc8\x56\x7a\x65\x45\x36\x6e\xae\x2a\xce\x9f\x43\x43\xd7"
++"\x10\xe9\x9e\x18\xf4\xa4\x35\xda\x8a\x6b\xb0\x3f\xdd\x53\xe3"
++"\xa8\xc5\x4e\x79\x9d\x1f\x51\x8c\xa2\xca\x66\x3c\x6a\x2a\xff"
++"\x8e\xd2\xf3\xb7\xcb\x82\xda\xde\x2c\xe6\xd2\x8c\xb3\xad\xb6"
++"\x4c\x95\x55\x76\xbd\xc9\xc8\xd1";
++
+ 
+     static const unsigned char dmq1[] =
+-"\x47\x1E\x02\x90\xFF\x0A\xF0\x75\x03\x51\xB7\xF8\x78\x86\x4C\xA9"
+-"\x61\xAD\xBD\x3A\x8A\x7E\x99\x1C\x5C\x05\x56\xA9\x4C\x31\x46\xA7"
+-"\xF9\x80\x3F\x8F\x6F\x8A\xE3\x42\xE9\x31\xFD\x8A\xE4\x7A\x22\x0D"
+-"\x1B\x99\xA4\x95\x84\x98\x07\xFE\x39\xF9\x24\x5A\x98\x36\xDA\x3D";
+-    
++"\x00\x83\x23\x1d\xbb\x11\x42\x17\x2b\x25\x5a\x2c\x03\xe6\x75"
++"\xc1\x18\xa8\xc9\x0b\x96\xbf\xba\xc4\x92\x91\x80\xa5\x22\x2f"
++"\xba\x91\x90\x36\x01\x56\x15\x00\x2c\x74\xa2\x97\xf7\x15\xa1"
++"\x49\xdf\x32\x35\xd2\xdd\x0c\x91\xa6\xf8\xe7\xbe\x81\x36\x9b"
++"\x03\xdc\x6b\x3b\xd8\x5d\x79\x57\xe0\xe6\x4f\x49\xdf\x4c\x5c"
++"\x0e\xe5\x21\x41\x95\xfd\xad\xff\x9a\x3e\xa0\xf9\x0f\x59\x9e"
++"\x6a\xa7\x7b\x71\xa7\x24\x9a\x36\x52\xae\x97\x20\xc1\x5e\x78"
++"\xd9\x47\x8b\x1e\x67\xf2\xaf\x98\xe6\x2d\xef\x10\xd7\xf1\xab"
++"\x49\xee\xe5\x4b\x7e\xae\x1f\x1d\x61";
++
++
+     static const unsigned char iqmp[] =
+-"\x00\xB0\x6C\x4F\xDA\xBB\x63\x01\x19\x8D\x26\x5B\xDB\xAE\x94\x23"
+-"\xB3\x80\xF2\x71\xF7\x34\x53\x88\x50\x93\x07\x7F\xCD\x39\xE2\x11"
+-"\x9F\xC9\x86\x32\x15\x4F\x58\x83\xB1\x67\xA9\x67\xBF\x40\x2B\x4E"
+-"\x9E\x2E\x0F\x96\x56\xE6\x98\xEA\x36\x66\xED\xFB\x25\x79\x80\x39"
+-"\xF7";
++"\x23\x96\xc1\x91\x17\x5e\x0a\x83\xd2\xdc\x7b\x69\xb2\x59\x1d"
++"\x33\x58\x52\x3f\x18\xc7\x09\x50\x1c\xb9\xa1\xbb\x4c\xa2\x38"
++"\x40\x4c\x9a\x8e\xfe\x9c\x90\x92\xd0\x71\x9f\x89\x99\x50\x91"
++"\x1f\x34\x8b\x74\x53\x11\x11\x4a\x70\xe2\xf7\x30\xd8\x8c\x80"
++"\xe1\xcc\x9f\xf1\x63\x17\x1a\x7d\x67\x29\x4c\xcb\x4e\x74\x7b"
++"\xe0\x3e\x9e\x2f\xf4\x67\x8f\xec\xb9\x5c\x00\x1e\x7e\xa2\x7b"
++"\x92\xc9\x6f\x4c\xe4\x0e\xf9\x48\x63\xcd\x50\x22\x5d\xbf\xb6"
++"\x9d\x01\x33\x6a\xf4\x50\xbe\x86\x98\x4f\xca\x3f\x3a\xfa\xcf"
++"\x07\x40\xc4\xaa\xad\xae\xbe\xbf";
+ 
+     key->n = BN_bin2bn(n, sizeof(n)-1, key->n);
+     if (corrupt_rsa)
+-	BN_set_bit(key->n, 1024);
++	BN_set_bit(key->n, 2048);
+     key->e = BN_bin2bn(e, sizeof(e)-1, key->e);
+     key->d = BN_bin2bn(d, sizeof(d)-1, key->d);
+     key->p = BN_bin2bn(p, sizeof(p)-1, key->p);
+@@ -145,201 +189,291 @@ void FIPS_corrupt_rsa()
+ static const unsigned char kat_tbs[] = "OpenSSL FIPS 140-2 Public Key RSA KAT";
+ 
+ static const unsigned char kat_RSA_PSS_SHA1[] = {
+-  0x2D, 0xAF, 0x6E, 0xC2, 0x98, 0xFB, 0x8A, 0xA1, 0xB9, 0x46, 0xDA, 0x0F,
+-  0x01, 0x1E, 0x37, 0x93, 0xC2, 0x55, 0x27, 0xE4, 0x1D, 0xD2, 0x90, 0xBB,
+-  0xF4, 0xBF, 0x4A, 0x74, 0x39, 0x51, 0xBB, 0xE8, 0x0C, 0xB7, 0xF8, 0xD3,
+-  0xD1, 0xDF, 0xE7, 0xBE, 0x80, 0x05, 0xC3, 0xB5, 0xC7, 0x83, 0xD5, 0x4C,
+-  0x7F, 0x49, 0xFB, 0x3F, 0x29, 0x9B, 0xE1, 0x12, 0x51, 0x60, 0xD0, 0xA7,
+-  0x0D, 0xA9, 0x28, 0x56, 0x73, 0xD9, 0x07, 0xE3, 0x5E, 0x3F, 0x9B, 0xF5,
+-  0xB6, 0xF3, 0xF2, 0x5E, 0x74, 0xC9, 0x83, 0x81, 0x47, 0xF0, 0xC5, 0x45,
+-  0x0A, 0xE9, 0x8E, 0x38, 0xD7, 0x18, 0xC6, 0x2A, 0x0F, 0xF8, 0xB7, 0x31,
+-  0xD6, 0x55, 0xE4, 0x66, 0x78, 0x81, 0xD4, 0xE6, 0xDB, 0x9F, 0xBA, 0xE8,
+-  0x23, 0xB5, 0x7F, 0xDC, 0x08, 0xEA, 0xD5, 0x26, 0x1E, 0x20, 0x25, 0x84,
+-  0x26, 0xC6, 0x79, 0xC9, 0x9B, 0x3D, 0x7E, 0xA9
++ 0xC2, 0x80, 0x82, 0x56, 0xD8, 0xA7, 0xB2, 0x9C, 0xF5, 0xD6, 0x3C, 0xE3,
++ 0xBF, 0xE9, 0x3A, 0x53, 0x40, 0xAE, 0xF2, 0xA9, 0x6A, 0x39, 0x49, 0x5B,
++ 0x05, 0x7F, 0x67, 0x38, 0x2E, 0x1D, 0xE1, 0x93, 0x22, 0x65, 0x79, 0x84,
++ 0x68, 0xFA, 0xD8, 0xAF, 0xA1, 0x98, 0x61, 0x6F, 0x44, 0x27, 0xA6, 0x8B,
++ 0xCF, 0x0E, 0x13, 0xA9, 0xCE, 0xD7, 0x6C, 0xD2, 0x38, 0xB5, 0x16, 0xB9,
++ 0x66, 0x94, 0x48, 0xDE, 0x9E, 0x19, 0x3D, 0x6F, 0xB3, 0xA1, 0x9A, 0x19,
++ 0xDF, 0xFB, 0xAB, 0xA5, 0x9F, 0x38, 0xDA, 0xC9, 0x21, 0x8F, 0xCE, 0x98,
++ 0x01, 0x3A, 0xC8, 0xE0, 0xDF, 0xDA, 0xFC, 0xF0, 0xA6, 0x86, 0x29, 0xB5,
++ 0x7F, 0x61, 0xFB, 0xBA, 0xC5, 0x49, 0xB2, 0x7C, 0x6A, 0x26, 0x82, 0xC4,
++ 0x8F, 0xAA, 0x5B, 0x10, 0xD5, 0xEE, 0xA0, 0x55, 0x42, 0xEF, 0x32, 0x5A,
++ 0x3F, 0x55, 0xB3, 0x2C, 0x22, 0xE9, 0x65, 0xDA, 0x8D, 0x0A, 0xB9, 0x70,
++ 0x43, 0xCC, 0x3F, 0x64, 0x9C, 0xB5, 0x65, 0x49, 0xBD, 0x7F, 0x35, 0xC1,
++ 0x20, 0x85, 0x24, 0xFE, 0xAA, 0x6B, 0x37, 0x04, 0xA1, 0x0E, 0x9D, 0x5C,
++ 0xBA, 0x7F, 0x14, 0x69, 0xC5, 0x93, 0xB2, 0x33, 0xC2, 0xC0, 0xC7, 0xDF,
++ 0x7E, 0x9E, 0xA4, 0xB0, 0xA0, 0x64, 0xD2, 0xAC, 0xFC, 0xFD, 0xFD, 0x99,
++ 0x8F, 0x6A, 0x40, 0x26, 0xC1, 0x2E, 0x4E, 0x8B, 0x33, 0xBE, 0xF1, 0x45,
++ 0x59, 0x8F, 0x33, 0x40, 0x1D, 0x2A, 0xD2, 0xF7, 0x50, 0x83, 0x89, 0xCF,
++ 0x94, 0xC6, 0xF8, 0x36, 0xF0, 0x84, 0x0B, 0x85, 0xA5, 0x02, 0xA9, 0x0F,
++ 0x41, 0x7A, 0x77, 0xA3, 0x2F, 0x47, 0x1E, 0x1D, 0xEC, 0xE6, 0xD3, 0x01,
++ 0x1E, 0x6F, 0x7A, 0x96, 0x50, 0x37, 0x37, 0x4B, 0x27, 0x52, 0x0B, 0xDC,
++ 0xDB, 0xC7, 0xA9, 0x31, 0xB2, 0x40, 0xEE, 0x60, 0x41, 0x26, 0x6A, 0x05,
++ 0xCE, 0x08, 0x1D, 0x89
+ };
+ 
+ static const unsigned char kat_RSA_PSS_SHA224[] = {
+-  0x39, 0x4A, 0x6A, 0x20, 0xBC, 0xE9, 0x33, 0xED, 0xEF, 0xC5, 0x58, 0xA7,
+-  0xFE, 0x81, 0xC4, 0x36, 0x50, 0x9A, 0x2C, 0x82, 0x98, 0x08, 0x95, 0xFA,
+-  0xB1, 0x9E, 0xD2, 0x55, 0x61, 0x87, 0x21, 0x59, 0x87, 0x7B, 0x1F, 0x57,
+-  0x30, 0x9D, 0x0D, 0x4A, 0x06, 0xEB, 0x52, 0x37, 0x55, 0x54, 0x1C, 0x89,
+-  0x83, 0x75, 0x59, 0x65, 0x64, 0x90, 0x2E, 0x16, 0xCC, 0x86, 0x05, 0xEE,
+-  0xB1, 0xE6, 0x7B, 0xBA, 0x16, 0x75, 0x0D, 0x0C, 0x64, 0x0B, 0xAB, 0x22,
+-  0x15, 0x78, 0x6B, 0x6F, 0xA4, 0xFB, 0x77, 0x40, 0x64, 0x62, 0xD1, 0xB5,
+-  0x37, 0x1E, 0xE0, 0x3D, 0xA8, 0xF9, 0xD2, 0xBD, 0xAA, 0x38, 0x24, 0x49,
+-  0x58, 0xD2, 0x74, 0x85, 0xF4, 0xB5, 0x93, 0x8E, 0xF5, 0x03, 0xEA, 0x2D,
+-  0xC8, 0x52, 0xFA, 0xCF, 0x7E, 0x35, 0xB0, 0x6A, 0xAF, 0x95, 0xC0, 0x00,
+-  0x54, 0x76, 0x3D, 0x0C, 0x9C, 0xB2, 0xEE, 0xC0
++ 0xB4, 0x01, 0x93, 0x16, 0x05, 0xF6, 0xEB, 0xE2, 0xA4, 0xEB, 0x48, 0xAA,
++ 0x00, 0xF4, 0xA1, 0x99, 0x0A, 0xB4, 0xB6, 0x63, 0xE9, 0x68, 0xCA, 0xB3,
++ 0x13, 0xD7, 0x66, 0x6A, 0xCD, 0xCB, 0x33, 0x9F, 0xE5, 0x84, 0xE2, 0xC3,
++ 0x0B, 0x53, 0xE5, 0x8B, 0x96, 0x4B, 0xDB, 0x2D, 0x80, 0xA4, 0x1D, 0xE3,
++ 0x81, 0xDC, 0x52, 0x99, 0xBA, 0x9B, 0x6A, 0x9D, 0x48, 0x1F, 0x73, 0xF7,
++ 0xAC, 0x09, 0x13, 0xA1, 0x16, 0x2C, 0x60, 0xFB, 0xBC, 0x25, 0xF7, 0x53,
++ 0xD1, 0x04, 0x5A, 0x3F, 0x95, 0x09, 0x5E, 0xE5, 0xA2, 0x7D, 0xFC, 0x2A,
++ 0x51, 0x1D, 0x21, 0xCE, 0x2B, 0x4E, 0x1B, 0xB8, 0xCB, 0xDD, 0x24, 0xEE,
++ 0x99, 0x1D, 0x37, 0xDC, 0xED, 0x5F, 0x2F, 0x48, 0x5E, 0x33, 0x94, 0x06,
++ 0x19, 0xCD, 0x5A, 0x26, 0x85, 0x77, 0x9D, 0xAF, 0x86, 0x97, 0xC9, 0x08,
++ 0xD5, 0x81, 0x0E, 0xB8, 0x9F, 0xB6, 0xAF, 0x20, 0x72, 0xDC, 0x13, 0x4D,
++ 0x7A, 0xE4, 0x5C, 0x81, 0xDE, 0xC0, 0x3D, 0x19, 0x9C, 0x33, 0x11, 0x07,
++ 0xD5, 0xA9, 0x51, 0x67, 0xCD, 0xFD, 0x37, 0x61, 0x14, 0x9F, 0xE7, 0x70,
++ 0x18, 0x32, 0xC3, 0x34, 0x54, 0x0D, 0x4F, 0xB4, 0xAE, 0x9F, 0xEC, 0x64,
++ 0xD8, 0xB2, 0x16, 0xA4, 0xB2, 0x99, 0x92, 0xCB, 0x7F, 0x1F, 0x06, 0x17,
++ 0x5F, 0xA1, 0x07, 0x68, 0xAE, 0xA7, 0x2D, 0x03, 0x91, 0x2A, 0x9D, 0x69,
++ 0xC2, 0x9D, 0x90, 0xF7, 0xF9, 0x66, 0x5D, 0x13, 0xB7, 0x7F, 0xD3, 0x97,
++ 0x45, 0x97, 0x43, 0xD8, 0xCE, 0x3C, 0xF2, 0x98, 0x98, 0xDD, 0xE2, 0x2D,
++ 0xCF, 0xA1, 0xC4, 0x25, 0x46, 0x2E, 0xD2, 0xE5, 0x5F, 0xC6, 0x01, 0xC5,
++ 0x4F, 0x42, 0x2B, 0xDE, 0x0F, 0xEA, 0x4A, 0x4F, 0xC3, 0x5B, 0xDF, 0x9B,
++ 0x5D, 0x30, 0x18, 0x93, 0xD0, 0xDE, 0xC5, 0x09, 0xAA, 0x57, 0x57, 0xBD,
++ 0x2D, 0x84, 0x03, 0xB7
+ };
+ 
+ static const unsigned char kat_RSA_PSS_SHA256[] = {
+-  0x6D, 0x3D, 0xBE, 0x8F, 0x60, 0x6D, 0x25, 0x14, 0xF0, 0x31, 0xE3, 0x89,
+-  0x00, 0x97, 0xFA, 0x99, 0x71, 0x28, 0xE5, 0x10, 0x25, 0x9A, 0xF3, 0x8F,
+-  0x7B, 0xC5, 0xA8, 0x4A, 0x74, 0x51, 0x36, 0xE2, 0x8D, 0x7D, 0x73, 0x28,
+-  0xC1, 0x77, 0xC6, 0x27, 0x97, 0x00, 0x8B, 0x00, 0xA3, 0x96, 0x73, 0x4E,
+-  0x7D, 0x2E, 0x2C, 0x34, 0x68, 0x8C, 0x8E, 0xDF, 0x9D, 0x49, 0x47, 0x05,
+-  0xAB, 0xF5, 0x01, 0xD6, 0x81, 0x47, 0x70, 0xF5, 0x1D, 0x6D, 0x26, 0xBA,
+-  0x2F, 0x7A, 0x54, 0x53, 0x4E, 0xED, 0x71, 0xD9, 0x5A, 0xF3, 0xDA, 0xB6,
+-  0x0B, 0x47, 0x34, 0xAF, 0x90, 0xDC, 0xC8, 0xD9, 0x6F, 0x56, 0xCD, 0x9F,
+-  0x21, 0xB7, 0x7E, 0xAD, 0x7C, 0x2F, 0x75, 0x50, 0x47, 0x12, 0xE4, 0x6D,
+-  0x5F, 0xB7, 0x01, 0xDF, 0xC3, 0x11, 0x6C, 0xA9, 0x9E, 0x49, 0xB9, 0xF6,
+-  0x72, 0xF4, 0xF6, 0xEF, 0x88, 0x1E, 0x2D, 0x1C
++ 0x38, 0xDA, 0x99, 0x51, 0x26, 0x38, 0xC6, 0x7F, 0xC4, 0x81, 0x57, 0x19,
++ 0x35, 0xC6, 0xF6, 0x1E, 0x90, 0x47, 0x20, 0x55, 0x47, 0x56, 0x26, 0xE9,
++ 0xF2, 0xA8, 0x39, 0x6C, 0xD5, 0xCD, 0xCB, 0x55, 0xFC, 0x0C, 0xC5, 0xCB,
++ 0xF7, 0x40, 0x17, 0x3B, 0xCF, 0xE4, 0x05, 0x03, 0x3B, 0xA0, 0xB2, 0xC9,
++ 0x0D, 0x5E, 0x48, 0x3A, 0xE9, 0xAD, 0x28, 0x71, 0x7D, 0x8F, 0x89, 0x16,
++ 0x59, 0x93, 0x35, 0xDC, 0x4D, 0x7B, 0xDF, 0x84, 0xE4, 0x68, 0xAA, 0x33,
++ 0xAA, 0xDC, 0x66, 0x50, 0xC8, 0xA9, 0x32, 0x12, 0xDC, 0xC6, 0x90, 0x49,
++ 0x0B, 0x75, 0xFF, 0x9B, 0x95, 0x00, 0x9A, 0x90, 0xE0, 0xD4, 0x0E, 0x67,
++ 0xAB, 0x3C, 0x47, 0x36, 0xC5, 0x2E, 0x1C, 0x46, 0xF0, 0x2D, 0xD3, 0x8B,
++ 0x42, 0x08, 0xDE, 0x0D, 0xB6, 0x2C, 0x86, 0xB0, 0x35, 0x71, 0x18, 0x6B,
++ 0x89, 0x67, 0xC0, 0x05, 0xAD, 0xF4, 0x1D, 0x62, 0x4E, 0x75, 0xEC, 0xD6,
++ 0xC2, 0xDB, 0x07, 0xB0, 0xB6, 0x8D, 0x15, 0xAD, 0xCD, 0xBF, 0xF5, 0x60,
++ 0x76, 0xAE, 0x48, 0xB8, 0x77, 0x7F, 0xC5, 0x01, 0xD9, 0x29, 0xBB, 0xD6,
++ 0x17, 0xA2, 0x20, 0x5A, 0xC0, 0x4A, 0x3B, 0x34, 0xC8, 0xB9, 0x39, 0xCF,
++ 0x06, 0x89, 0x95, 0x6F, 0xC7, 0xCA, 0xC4, 0xE4, 0x43, 0xDF, 0x5A, 0x23,
++ 0xE2, 0x89, 0xA3, 0x38, 0x78, 0x31, 0x38, 0xC6, 0xA4, 0x6F, 0x5F, 0x73,
++ 0x5A, 0xE5, 0x9E, 0x09, 0xE7, 0x6F, 0xD4, 0xF8, 0x3E, 0xB7, 0xB0, 0x56,
++ 0x9A, 0xF3, 0x65, 0xF0, 0xC2, 0xA6, 0x8A, 0x08, 0xBA, 0x44, 0xAC, 0x97,
++ 0xDE, 0xB4, 0x16, 0x83, 0xDF, 0xE3, 0xEE, 0x71, 0xFA, 0xF9, 0x51, 0x50,
++ 0x14, 0xDC, 0xFD, 0x6A, 0x82, 0x20, 0x68, 0x64, 0x7D, 0x4E, 0x82, 0x68,
++ 0xD7, 0x45, 0xFA, 0x6A, 0xE4, 0xE5, 0x29, 0x3A, 0x70, 0xFB, 0xE4, 0x62,
++ 0x2B, 0x31, 0xB9, 0x7D
+ };
+ 
+ static const unsigned char kat_RSA_PSS_SHA384[] = {
+-  0x40, 0xFB, 0xA1, 0x21, 0xF4, 0xB2, 0x40, 0x9A, 0xB4, 0x31, 0xA8, 0xF2,
+-  0xEC, 0x1C, 0xC4, 0xC8, 0x7C, 0x22, 0x65, 0x9C, 0x57, 0x45, 0xCD, 0x5E,
+-  0x86, 0x00, 0xF7, 0x25, 0x78, 0xDE, 0xDC, 0x7A, 0x71, 0x44, 0x9A, 0xCD,
+-  0xAA, 0x25, 0xF4, 0xB2, 0xFC, 0xF0, 0x75, 0xD9, 0x2F, 0x78, 0x23, 0x7F,
+-  0x6F, 0x02, 0xEF, 0xC1, 0xAF, 0xA6, 0x28, 0x16, 0x31, 0xDC, 0x42, 0x6C,
+-  0xB2, 0x44, 0xE5, 0x4D, 0x66, 0xA2, 0xE6, 0x71, 0xF3, 0xAC, 0x4F, 0xFB,
+-  0x91, 0xCA, 0xF5, 0x70, 0xEF, 0x6B, 0x9D, 0xA4, 0xEF, 0xD9, 0x3D, 0x2F,
+-  0x3A, 0xBE, 0x89, 0x38, 0x59, 0x01, 0xBA, 0xDA, 0x32, 0xAD, 0x42, 0x89,
+-  0x98, 0x8B, 0x39, 0x44, 0xF0, 0xFC, 0x38, 0xAC, 0x87, 0x1F, 0xCA, 0x6F,
+-  0x48, 0xF6, 0xAE, 0xD7, 0x45, 0xEE, 0xAE, 0x88, 0x0E, 0x60, 0xF4, 0x55,
+-  0x48, 0x44, 0xEE, 0x1F, 0x90, 0x18, 0x4B, 0xF1
++ 0x99, 0x02, 0xC9, 0x1E, 0x31, 0x82, 0xB4, 0xE6, 0x1B, 0x32, 0xCE, 0x5D,
++ 0x41, 0x1D, 0x00, 0x2F, 0x04, 0x8B, 0xBD, 0x37, 0x79, 0xCF, 0x77, 0x03,
++ 0x05, 0x6A, 0x21, 0xC7, 0x8D, 0x24, 0x60, 0x49, 0x39, 0x58, 0xC5, 0x27,
++ 0x8F, 0xC5, 0x97, 0x4A, 0xB2, 0xE1, 0xD4, 0x36, 0x57, 0xBD, 0x43, 0xCC,
++ 0x7B, 0xCE, 0xF2, 0xA5, 0x30, 0xF8, 0x72, 0x14, 0xBB, 0xD0, 0x9F, 0xC1,
++ 0x49, 0xC8, 0x1C, 0xAF, 0xCD, 0x95, 0x78, 0x72, 0x25, 0xF9, 0x45, 0xC6,
++ 0x5B, 0x62, 0x5E, 0x01, 0xD7, 0x40, 0x5E, 0xC8, 0xCA, 0x0A, 0xF3, 0xBA,
++ 0x08, 0x07, 0x88, 0xCA, 0x49, 0x36, 0x84, 0x7D, 0xF6, 0xFC, 0x5A, 0xDB,
++ 0xFC, 0x50, 0xD3, 0xEB, 0x3D, 0x83, 0xB0, 0xF5, 0x94, 0x5E, 0x88, 0xC3,
++ 0x82, 0xCD, 0x53, 0x40, 0x96, 0x18, 0x6B, 0x4A, 0x6C, 0x9C, 0xFE, 0xE5,
++ 0x3B, 0x75, 0xF9, 0xEB, 0xA5, 0x77, 0x11, 0xEF, 0x88, 0x1C, 0x25, 0x70,
++ 0x7D, 0x88, 0x5D, 0xC3, 0xCA, 0xE1, 0x49, 0x14, 0x90, 0xAD, 0xF2, 0x5E,
++ 0x49, 0xD7, 0x99, 0xA5, 0x7B, 0x77, 0x3B, 0x8E, 0xB8, 0xDB, 0xF1, 0x4C,
++ 0xD6, 0x9A, 0xDC, 0xE5, 0x7A, 0x1C, 0xE1, 0xCE, 0x9D, 0xF1, 0xF3, 0xA0,
++ 0x0A, 0x35, 0x52, 0x9D, 0xB9, 0x46, 0x94, 0x82, 0x0F, 0xF7, 0xB2, 0x62,
++ 0x51, 0x70, 0x75, 0xD2, 0x37, 0x96, 0x67, 0x2F, 0xD0, 0x22, 0xD8, 0x07,
++ 0x8D, 0x69, 0x9E, 0x6D, 0x0B, 0x40, 0x4F, 0x70, 0xEC, 0x0B, 0xCA, 0x88,
++ 0x80, 0x8D, 0x9A, 0xF4, 0xF9, 0x18, 0x50, 0x27, 0x08, 0xFA, 0xCC, 0xC7,
++ 0x3F, 0xE4, 0x84, 0x83, 0xA1, 0xB6, 0x1D, 0x23, 0x34, 0xFE, 0x48, 0xE5,
++ 0xE3, 0xAE, 0x4D, 0x98, 0xBC, 0xA6, 0x8A, 0x9F, 0xFD, 0x4D, 0xDB, 0x9D,
++ 0xF7, 0xEB, 0x4E, 0xB6, 0x6F, 0x25, 0xEA, 0x7A, 0xE9, 0x85, 0xB2, 0xEF,
++ 0x90, 0xD2, 0xA6, 0x2B
+ };
+ 
+ static const unsigned char kat_RSA_PSS_SHA512[] = {
+-  0x07, 0x1E, 0xD8, 0xD5, 0x05, 0xE8, 0xE6, 0xE6, 0x57, 0xAE, 0x63, 0x8C,
+-  0xC6, 0x83, 0xB7, 0xA0, 0x59, 0xBB, 0xF2, 0xC6, 0x8F, 0x12, 0x53, 0x9A,
+-  0x9B, 0x54, 0x9E, 0xB3, 0xC1, 0x1D, 0x23, 0x4D, 0x51, 0xED, 0x9E, 0xDD,
+-  0x4B, 0xF3, 0x46, 0x9B, 0x6B, 0xF6, 0x7C, 0x24, 0x60, 0x79, 0x23, 0x39,
+-  0x01, 0x1C, 0x51, 0xCB, 0xD8, 0xE9, 0x9A, 0x01, 0x67, 0x5F, 0xFE, 0xD7,
+-  0x7C, 0xE3, 0x7F, 0xED, 0xDB, 0x87, 0xBB, 0xF0, 0x3D, 0x78, 0x55, 0x61,
+-  0x57, 0xE3, 0x0F, 0xE3, 0xD2, 0x9D, 0x0C, 0x2A, 0x20, 0xB0, 0x85, 0x13,
+-  0xC5, 0x47, 0x34, 0x0D, 0x32, 0x15, 0xC8, 0xAE, 0x9A, 0x6A, 0x39, 0x63,
+-  0x2D, 0x60, 0xF5, 0x4C, 0xDF, 0x8A, 0x48, 0x4B, 0xBF, 0xF4, 0xA8, 0xFE,
+-  0x76, 0xF2, 0x32, 0x1B, 0x9C, 0x7C, 0xCA, 0xFE, 0x7F, 0x80, 0xC2, 0x88,
+-  0x5C, 0x97, 0x70, 0xB4, 0x26, 0xC9, 0x14, 0x8B
++ 0x3F, 0x83, 0x43, 0x78, 0x25, 0xBE, 0x81, 0xB2, 0x6E, 0x78, 0x11, 0x32,
++ 0xD0, 0x88, 0x05, 0x53, 0x95, 0xED, 0x81, 0x12, 0xCE, 0x50, 0xD9, 0x06,
++ 0x42, 0x89, 0xA0, 0x55, 0x7A, 0x05, 0x13, 0x94, 0x35, 0x9B, 0xCA, 0x5D,
++ 0xCB, 0xB2, 0x32, 0xE1, 0x04, 0x99, 0xEC, 0xE7, 0xA6, 0x69, 0x4D, 0x2B,
++ 0xC1, 0x57, 0x13, 0x48, 0x0D, 0x6B, 0x4D, 0x83, 0x28, 0x06, 0x79, 0x9D,
++ 0xB4, 0x70, 0xCE, 0xC0, 0xFC, 0x3B, 0x69, 0xB3, 0x91, 0x54, 0xA9, 0x44,
++ 0x2E, 0xDA, 0x4A, 0xC5, 0xC2, 0x99, 0xF0, 0xDE, 0xCA, 0x77, 0x99, 0x6B,
++ 0x0C, 0x79, 0xE5, 0x29, 0x74, 0x83, 0x69, 0xEA, 0xB8, 0x72, 0x30, 0x3D,
++ 0x7A, 0x30, 0xE1, 0x03, 0x7B, 0x09, 0xE6, 0x11, 0xC0, 0xDC, 0xFF, 0xFD,
++ 0xBD, 0xEC, 0x9C, 0xCC, 0x46, 0x7B, 0x4C, 0x4C, 0x59, 0xBE, 0x82, 0x7C,
++ 0xF5, 0x60, 0x5A, 0xC3, 0xE8, 0xA8, 0x8A, 0x38, 0x9E, 0x01, 0x57, 0xF1,
++ 0x79, 0x3A, 0x7C, 0xA3, 0x9F, 0x12, 0x1A, 0x4F, 0x2E, 0xA2, 0xE5, 0x0A,
++ 0xAB, 0xC0, 0xF4, 0xA5, 0xE3, 0x5F, 0x89, 0x1C, 0x8F, 0xA4, 0x5E, 0xCE,
++ 0x0D, 0x91, 0x05, 0x1B, 0x17, 0x62, 0x48, 0xFE, 0xA5, 0x4C, 0xEF, 0x2D,
++ 0x28, 0xF1, 0x5E, 0xE6, 0xD1, 0x30, 0x89, 0x0A, 0xAD, 0x18, 0xAF, 0x6F,
++ 0x04, 0x09, 0x36, 0x9A, 0xFF, 0xCA, 0xA1, 0xA7, 0x05, 0x7F, 0xD4, 0xBF,
++ 0x3A, 0xB5, 0x42, 0x6D, 0xE9, 0x07, 0x29, 0x65, 0x8B, 0xAD, 0x4D, 0x0F,
++ 0x22, 0xE1, 0x59, 0x43, 0x68, 0x87, 0xA8, 0x8B, 0xBC, 0x69, 0xA1, 0x94,
++ 0x22, 0x3E, 0x8A, 0x49, 0xE8, 0xA3, 0x6F, 0xC2, 0x93, 0x58, 0xE7, 0xAE,
++ 0xC9, 0x1F, 0xCF, 0x61, 0x93, 0xFC, 0xC1, 0xF6, 0xF3, 0x27, 0x7F, 0x0A,
++ 0x90, 0xE0, 0x65, 0x32, 0x57, 0x47, 0xE2, 0xED, 0x08, 0x59, 0xA6, 0xF0,
++ 0x17, 0x2C, 0x13, 0xE0
+ };
+ 
+ static const unsigned char kat_RSA_SHA1[] = {
+-  0x71, 0xEE, 0x1A, 0xC0, 0xFE, 0x01, 0x93, 0x54, 0x79, 0x5C, 0xF2, 0x4C,
+-  0x4A, 0xFD, 0x1A, 0x05, 0x8F, 0x64, 0xB1, 0x6D, 0x61, 0x33, 0x8D, 0x9B,
+-  0xE7, 0xFD, 0x60, 0xA3, 0x83, 0xB5, 0xA3, 0x51, 0x55, 0x77, 0x90, 0xCF,
+-  0xDC, 0x22, 0x37, 0x8E, 0xD0, 0xE1, 0xAE, 0x09, 0xE3, 0x3D, 0x1E, 0xF8,
+-  0x80, 0xD1, 0x8B, 0xC2, 0xEC, 0x0A, 0xD7, 0x6B, 0x88, 0x8B, 0x8B, 0xA1,
+-  0x20, 0x22, 0xBE, 0x59, 0x5B, 0xE0, 0x23, 0x24, 0xA1, 0x49, 0x30, 0xBA,
+-  0xA9, 0x9E, 0xE8, 0xB1, 0x8A, 0x62, 0x16, 0xBF, 0x4E, 0xCA, 0x2E, 0x4E,
+-  0xBC, 0x29, 0xA8, 0x67, 0x13, 0xB7, 0x9F, 0x1D, 0x04, 0x44, 0xE5, 0x5F,
+-  0x35, 0x07, 0x11, 0xBC, 0xED, 0x19, 0x37, 0x21, 0xCF, 0x23, 0x48, 0x1F,
+-  0x72, 0x05, 0xDE, 0xE6, 0xE8, 0x7F, 0x33, 0x8A, 0x76, 0x4B, 0x2F, 0x95,
+-  0xDF, 0xF1, 0x5F, 0x84, 0x80, 0xD9, 0x46, 0xB4
++ 0x3B, 0x60, 0x4B, 0xFC, 0x54, 0x28, 0x23, 0xE6, 0x2F, 0x05, 0x04, 0xBA,
++ 0x9D, 0xE4, 0x3C, 0xB8, 0x5B, 0x60, 0x5C, 0xCD, 0x9D, 0xEA, 0xC3, 0x4C,
++ 0xC2, 0x33, 0xE6, 0xC6, 0x21, 0x48, 0x76, 0xEC, 0xB2, 0xF5, 0x11, 0xDE,
++ 0x44, 0xB4, 0xAF, 0x16, 0x11, 0xC3, 0x18, 0x16, 0xB3, 0x69, 0xBB, 0x94,
++ 0xED, 0xE8, 0xB3, 0x9E, 0xB1, 0x43, 0x8E, 0xCE, 0xB4, 0x34, 0x9B, 0x08,
++ 0x22, 0xAF, 0x31, 0x73, 0xB5, 0xFA, 0x11, 0x7E, 0x8F, 0x13, 0x52, 0xEC,
++ 0xC9, 0x03, 0xEE, 0x0D, 0x2B, 0x91, 0x32, 0xF2, 0x8E, 0xDF, 0x02, 0xE0,
++ 0x0A, 0x47, 0xD2, 0x0A, 0x51, 0x00, 0x1A, 0x30, 0x6F, 0x0C, 0xB3, 0x54,
++ 0x64, 0x20, 0x90, 0x0C, 0x01, 0xBE, 0xC0, 0x42, 0x8C, 0x5D, 0x18, 0x6F,
++ 0x32, 0x75, 0x45, 0x7B, 0x1C, 0x04, 0xA2, 0x9F, 0x84, 0xD7, 0xF5, 0x3A,
++ 0x95, 0xD4, 0xE8, 0x8D, 0xEC, 0x99, 0xEF, 0x18, 0x5E, 0x64, 0xD3, 0xAF,
++ 0xF8, 0xD4, 0xFF, 0x3C, 0x87, 0xA0, 0x3F, 0xC7, 0x22, 0x05, 0xFD, 0xFD,
++ 0x29, 0x8A, 0x28, 0xDA, 0xA9, 0x8A, 0x8B, 0x23, 0x62, 0x9D, 0x42, 0xB8,
++ 0x4A, 0x76, 0x0D, 0x9F, 0x9A, 0xE0, 0xE6, 0xDD, 0xAD, 0x5E, 0x5F, 0xD5,
++ 0x32, 0xE9, 0x4B, 0x97, 0x7D, 0x62, 0x0A, 0xB3, 0xBE, 0xF2, 0x8C, 0x1F,
++ 0x2B, 0x22, 0x06, 0x15, 0x33, 0x71, 0xED, 0x9B, 0xA0, 0x82, 0xCE, 0xBF,
++ 0x3B, 0x08, 0x5F, 0xA7, 0x20, 0x94, 0x09, 0xEB, 0x82, 0xA5, 0x41, 0x60,
++ 0xF1, 0x08, 0xEB, 0x8D, 0xCC, 0x8D, 0xC9, 0x52, 0x0A, 0xAF, 0xF4, 0xF9,
++ 0x9F, 0x82, 0xD8, 0x0B, 0x75, 0x5E, 0xE4, 0xAF, 0x65, 0x96, 0xAF, 0xFC,
++ 0x33, 0xBF, 0x9F, 0x3E, 0xA4, 0x7B, 0x86, 0xC7, 0xF7, 0x47, 0xAB, 0x37,
++ 0x05, 0xD6, 0x0D, 0x31, 0x72, 0x8C, 0x80, 0x1E, 0xA9, 0x54, 0xFC, 0xDF,
++ 0x27, 0x90, 0xE2, 0x01
+ };
+ 
+ static const unsigned char kat_RSA_SHA224[] = {
+-  0x62, 0xAA, 0x79, 0xA9, 0x18, 0x0E, 0x5F, 0x8C, 0xBB, 0xB7, 0x15, 0xF9,
+-  0x25, 0xBB, 0xFA, 0xD4, 0x3A, 0x34, 0xED, 0x9E, 0xA0, 0xA9, 0x18, 0x8D,
+-  0x5B, 0x55, 0x9A, 0x7E, 0x1E, 0x08, 0x08, 0x60, 0xC5, 0x1A, 0xC5, 0x89,
+-  0x08, 0xE2, 0x1B, 0xBD, 0x62, 0x50, 0x17, 0x76, 0x30, 0x2C, 0x9E, 0xCD,
+-  0xA4, 0x02, 0xAD, 0xB1, 0x6D, 0x44, 0x6D, 0xD5, 0xC6, 0x45, 0x41, 0xE5,
+-  0xEE, 0x1F, 0x8D, 0x7E, 0x08, 0x16, 0xA6, 0xE1, 0x5E, 0x0B, 0xA9, 0xCC,
+-  0xDB, 0x59, 0x55, 0x87, 0x09, 0x25, 0x70, 0x86, 0x84, 0x02, 0xC6, 0x3B,
+-  0x0B, 0x44, 0x4C, 0x46, 0x95, 0xF4, 0xF8, 0x5A, 0x91, 0x28, 0x3E, 0xB2,
+-  0x58, 0x2E, 0x06, 0x45, 0x49, 0xE0, 0x92, 0xE2, 0xC0, 0x66, 0xE6, 0x35,
+-  0xD9, 0x79, 0x7F, 0x17, 0x5E, 0x02, 0x73, 0x04, 0x77, 0x82, 0xE6, 0xDC,
+-  0x40, 0x21, 0x89, 0x8B, 0x37, 0x3E, 0x1E, 0x8D
++ 0xA2, 0xD8, 0x42, 0x53, 0xDD, 0xBF, 0x1F, 0x6B, 0x07, 0xE0, 0x60, 0x86,
++ 0x5A, 0x60, 0x06, 0x8F, 0x44, 0xD9, 0xB0, 0x4A, 0xAA, 0x90, 0x71, 0xB8,
++ 0xB2, 0xBC, 0x30, 0x41, 0x50, 0xBB, 0xFD, 0x46, 0x98, 0x4D, 0xC0, 0x89,
++ 0x57, 0x85, 0x8A, 0x97, 0x49, 0x25, 0xA8, 0x0C, 0x69, 0x70, 0x19, 0x39,
++ 0x66, 0x24, 0xB4, 0x69, 0x47, 0xD2, 0x7C, 0xDE, 0x2D, 0x37, 0x59, 0xB3,
++ 0xE3, 0xC7, 0x6B, 0xDD, 0xBE, 0xE1, 0xE6, 0x28, 0x9A, 0x8D, 0x42, 0x3E,
++ 0x28, 0x01, 0xD7, 0x03, 0xC9, 0x73, 0xC3, 0x6B, 0x03, 0xEC, 0x1E, 0xF8,
++ 0x53, 0x8B, 0x52, 0x42, 0x89, 0x55, 0xB7, 0x87, 0xA9, 0x94, 0xC2, 0xB4,
++ 0x4B, 0x76, 0xF5, 0x61, 0x47, 0xE1, 0x44, 0x7B, 0xEC, 0xB4, 0x25, 0x66,
++ 0xC0, 0xFF, 0xEB, 0x86, 0x24, 0xAA, 0xA8, 0x72, 0xC7, 0xFB, 0xFB, 0xF6,
++ 0x84, 0xA7, 0x5B, 0xD4, 0x87, 0xE5, 0x84, 0x56, 0x1E, 0x4C, 0xE5, 0xBC,
++ 0x87, 0x94, 0xAC, 0x9C, 0x1B, 0x3D, 0xF7, 0xD4, 0x36, 0x85, 0x9F, 0xC9,
++ 0xF6, 0x43, 0x3F, 0xB6, 0x25, 0x33, 0x48, 0x0F, 0xE5, 0x7C, 0xCD, 0x53,
++ 0x48, 0xEB, 0x02, 0x11, 0xB9, 0x9E, 0xC3, 0xB4, 0xE1, 0x54, 0xD6, 0xAA,
++ 0x1A, 0x9E, 0x10, 0xE1, 0x27, 0x25, 0xF2, 0xE1, 0xAB, 0xAB, 0x6C, 0x45,
++ 0x61, 0xD5, 0xA3, 0x6C, 0xB6, 0x33, 0x52, 0xAE, 0x3D, 0xFD, 0x22, 0xFC,
++ 0x3A, 0xAB, 0x63, 0x94, 0xB5, 0x3A, 0x69, 0x11, 0xAC, 0x99, 0x4F, 0x33,
++ 0x67, 0x0A, 0x1A, 0x70, 0x1E, 0xB9, 0xE2, 0x26, 0x27, 0x68, 0xEA, 0xF5,
++ 0x97, 0x55, 0xAC, 0x83, 0x6A, 0x40, 0x3B, 0x56, 0xAE, 0x13, 0x88, 0xE8,
++ 0x98, 0x72, 0x52, 0x91, 0x7F, 0x78, 0x0A, 0x18, 0xD4, 0x44, 0x78, 0x83,
++ 0x0D, 0x44, 0x77, 0xA6, 0xF3, 0x04, 0xF1, 0x8C, 0xBC, 0x2F, 0xF9, 0x5B,
++ 0xDB, 0x70, 0x00, 0xF6
+ };
+ 
+ static const unsigned char kat_RSA_SHA256[] = {
+-  0x0D, 0x55, 0xE2, 0xAA, 0x81, 0xDB, 0x8E, 0x82, 0x05, 0x17, 0xA5, 0x23,
+-  0xE7, 0x3B, 0x1D, 0xAF, 0xFB, 0x8C, 0xD0, 0x81, 0x20, 0x7B, 0xAA, 0x23,
+-  0x92, 0x87, 0x8C, 0xD1, 0x53, 0x85, 0x16, 0xDC, 0xBE, 0xAD, 0x6F, 0x35,
+-  0x98, 0x2D, 0x69, 0x84, 0xBF, 0xD9, 0x8A, 0x01, 0x17, 0x58, 0xB2, 0x6E,
+-  0x2C, 0x44, 0x9B, 0x90, 0xF1, 0xFB, 0x51, 0xE8, 0x6A, 0x90, 0x2D, 0x18,
+-  0x0E, 0xC0, 0x90, 0x10, 0x24, 0xA9, 0x1D, 0xB3, 0x58, 0x7A, 0x91, 0x30,
+-  0xBE, 0x22, 0xC7, 0xD3, 0xEC, 0xC3, 0x09, 0x5D, 0xBF, 0xE2, 0x80, 0x3A,
+-  0x7C, 0x85, 0xB4, 0xBC, 0xD1, 0xE9, 0xF0, 0x5C, 0xDE, 0x81, 0xA6, 0x38,
+-  0xB8, 0x42, 0xBB, 0x86, 0xC5, 0x9D, 0xCE, 0x7C, 0x2C, 0xEE, 0xD1, 0xDA,
+-  0x27, 0x48, 0x2B, 0xF5, 0xAB, 0xB9, 0xF7, 0x80, 0xD1, 0x90, 0x27, 0x90,
+-  0xBD, 0x44, 0x97, 0x60, 0xCD, 0x57, 0xC0, 0x7A
++ 0xC2, 0xB1, 0x97, 0x00, 0x9A, 0xE5, 0x80, 0x6A, 0xE2, 0x51, 0x68, 0xB9,
++ 0x7A, 0x0C, 0xF2, 0xB4, 0x77, 0xED, 0x15, 0x0C, 0x4E, 0xE1, 0xDC, 0xFF,
++ 0x8E, 0xBC, 0xDE, 0xC7, 0x9A, 0x96, 0xF1, 0x47, 0x45, 0x24, 0x9D, 0x6F,
++ 0xA6, 0xF3, 0x1D, 0x0D, 0x35, 0x4C, 0x1A, 0xF3, 0x58, 0x2C, 0x6C, 0x06,
++ 0xD6, 0x22, 0x37, 0x77, 0x8C, 0x33, 0xE5, 0x07, 0x53, 0x93, 0x28, 0xCF,
++ 0x67, 0xFA, 0xC4, 0x1F, 0x1B, 0x24, 0xDB, 0x4C, 0xC5, 0x2A, 0x51, 0xA2,
++ 0x60, 0x15, 0x8C, 0x54, 0xB4, 0x30, 0xE2, 0x24, 0x47, 0x86, 0xF2, 0xF8,
++ 0x6C, 0xD6, 0x12, 0x59, 0x2C, 0x74, 0x9A, 0x37, 0xF3, 0xC4, 0xA2, 0xD5,
++ 0x4E, 0x1F, 0x77, 0xF0, 0x27, 0xCE, 0x77, 0xF8, 0x4A, 0x79, 0x03, 0xBE,
++ 0xC8, 0x06, 0x2D, 0xA7, 0xA6, 0x46, 0xF5, 0x55, 0x79, 0xD7, 0x5C, 0xC6,
++ 0x5B, 0xB1, 0x00, 0x4E, 0x7C, 0xD9, 0x11, 0x85, 0xE0, 0xB1, 0x4D, 0x2D,
++ 0x13, 0xD7, 0xAC, 0xEA, 0x64, 0xD1, 0xAC, 0x8F, 0x8D, 0x8F, 0xEA, 0x42,
++ 0x7F, 0xF9, 0xB7, 0x7D, 0x2C, 0x68, 0x49, 0x07, 0x7A, 0x74, 0xEF, 0xB4,
++ 0xC9, 0x97, 0x16, 0x5C, 0x6C, 0x6E, 0x5C, 0x09, 0x2E, 0x8E, 0x13, 0x2E,
++ 0x1A, 0x8D, 0xA6, 0x0C, 0x6E, 0x0C, 0x1C, 0x0F, 0xCC, 0xB2, 0x78, 0x8A,
++ 0x07, 0xFC, 0x5C, 0xC2, 0xF5, 0x65, 0xEC, 0xAB, 0x8B, 0x3C, 0xCA, 0x91,
++ 0x6F, 0x84, 0x7C, 0x21, 0x0E, 0xB8, 0xDA, 0x7B, 0x6C, 0xF7, 0xDF, 0xAB,
++ 0x7E, 0x15, 0xFD, 0x85, 0x0B, 0x33, 0x9B, 0x6A, 0x3A, 0xC3, 0xEF, 0x65,
++ 0x04, 0x6E, 0xB2, 0xAC, 0x98, 0xFD, 0xEB, 0x02, 0xF5, 0xC0, 0x0B, 0x5E,
++ 0xCB, 0xD4, 0x83, 0x82, 0x18, 0x1B, 0xDA, 0xB4, 0xCD, 0xE8, 0x71, 0x6B,
++ 0x1D, 0xB5, 0x4F, 0xE9, 0xD6, 0x43, 0xA0, 0x0A, 0x14, 0xA0, 0xE7, 0x5D,
++ 0x47, 0x9D, 0x18, 0xD7
+ };
+ 
+ static const unsigned char kat_RSA_SHA384[] = {
+-  0x1D, 0xE3, 0x6A, 0xDD, 0x27, 0x4C, 0xC0, 0xA5, 0x27, 0xEF, 0xE6, 0x1F,
+-  0xD2, 0x91, 0x68, 0x59, 0x04, 0xAE, 0xBD, 0x99, 0x63, 0x56, 0x47, 0xC7,
+-  0x6F, 0x22, 0x16, 0x48, 0xD0, 0xF9, 0x18, 0xA9, 0xCA, 0xFA, 0x5D, 0x5C,
+-  0xA7, 0x65, 0x52, 0x8A, 0xC8, 0x44, 0x7E, 0x86, 0x5D, 0xA9, 0xA6, 0x55,
+-  0x65, 0x3E, 0xD9, 0x2D, 0x02, 0x38, 0xA8, 0x79, 0x28, 0x7F, 0xB6, 0xCF,
+-  0x82, 0xDD, 0x7E, 0x55, 0xE1, 0xB1, 0xBC, 0xE2, 0x19, 0x2B, 0x30, 0xC2,
+-  0x1B, 0x2B, 0xB0, 0x82, 0x46, 0xAC, 0x4B, 0xD1, 0xE2, 0x7D, 0xEB, 0x8C,
+-  0xFF, 0x95, 0xE9, 0x6A, 0x1C, 0x3D, 0x4D, 0xBF, 0x8F, 0x8B, 0x9C, 0xCD,
+-  0xEA, 0x85, 0xEE, 0x00, 0xDC, 0x1C, 0xA7, 0xEB, 0xD0, 0x8F, 0x99, 0xF1,
+-  0x16, 0x28, 0x24, 0x64, 0x04, 0x39, 0x2D, 0x58, 0x1E, 0x37, 0xDC, 0x04,
+-  0xBD, 0x31, 0xA2, 0x2F, 0xB3, 0x35, 0x56, 0xBF
++ 0x11, 0x5E, 0x63, 0xFE, 0x47, 0xAA, 0x6A, 0x84, 0xEB, 0x44, 0x9A, 0x00,
++ 0x96, 0x4A, 0xED, 0xD2, 0xA7, 0x67, 0x3A, 0x64, 0x82, 0x30, 0x61, 0x2D,
++ 0xE3, 0xF5, 0x49, 0x68, 0x5E, 0x60, 0xD2, 0x4D, 0xEF, 0xF2, 0xA4, 0xB2,
++ 0x9A, 0x81, 0x1D, 0x41, 0xA5, 0x73, 0x59, 0xEB, 0xBB, 0xC4, 0x9E, 0x2B,
++ 0xEB, 0xC3, 0xDE, 0x3A, 0xEA, 0xF5, 0xAD, 0xDA, 0x87, 0x08, 0x68, 0xCF,
++ 0x12, 0x9B, 0xC1, 0xE4, 0xA7, 0x71, 0xF8, 0xBD, 0x6B, 0x6F, 0x50, 0xF1,
++ 0xD1, 0xFF, 0xCE, 0x6C, 0xD9, 0xBE, 0xDA, 0x76, 0xF3, 0xEB, 0xAB, 0x9C,
++ 0x41, 0x6E, 0x4F, 0x35, 0x7A, 0x61, 0x27, 0xBC, 0x03, 0x3E, 0xAE, 0x3E,
++ 0x1B, 0xDD, 0xAC, 0xD9, 0x1A, 0xFF, 0xD3, 0xF5, 0x66, 0x43, 0x07, 0x76,
++ 0x8A, 0x69, 0x2D, 0x14, 0xB1, 0xBE, 0x55, 0x49, 0x90, 0x89, 0x4B, 0xC4,
++ 0x11, 0x67, 0xD5, 0x9D, 0xB0, 0xB2, 0xEE, 0x8D, 0x0A, 0x47, 0x4A, 0xD9,
++ 0x0E, 0xD1, 0x24, 0xF0, 0x30, 0x2B, 0xF2, 0x79, 0x47, 0xDB, 0x70, 0xB4,
++ 0x46, 0xF2, 0xF8, 0xB7, 0xB4, 0xF6, 0x34, 0x79, 0xA8, 0x2D, 0x3D, 0x56,
++ 0xD5, 0x9A, 0x60, 0x7A, 0x04, 0xC7, 0x66, 0x1D, 0xCD, 0x3C, 0xD5, 0x39,
++ 0x37, 0x12, 0x51, 0x5E, 0x9F, 0xF8, 0x1A, 0xAF, 0x13, 0xC1, 0x13, 0x00,
++ 0x35, 0xD5, 0x8D, 0x17, 0xE3, 0x02, 0x28, 0xD9, 0xEC, 0xDE, 0xD1, 0x2F,
++ 0x93, 0x49, 0x03, 0x11, 0x3E, 0x56, 0x9D, 0xC2, 0x31, 0xF8, 0xAF, 0x2D,
++ 0xD9, 0x99, 0xB7, 0x8A, 0xAC, 0x5A, 0x86, 0x20, 0x3A, 0x83, 0x29, 0x26,
++ 0x9D, 0x03, 0x52, 0x2B, 0x34, 0x56, 0x40, 0x16, 0x53, 0x50, 0x82, 0xC9,
++ 0xC7, 0xD5, 0x51, 0x4C, 0xED, 0xB3, 0xE2, 0xE1, 0xCF, 0xA8, 0xCE, 0xBD,
++ 0xB1, 0x48, 0xA6, 0x8A, 0x79, 0x17, 0x55, 0x11, 0xEF, 0xE8, 0x14, 0xF4,
++ 0x7E, 0x37, 0x1D, 0x96
+ };
+ 
+ static const unsigned char kat_RSA_SHA512[] = {
+-  0x69, 0x52, 0x1B, 0x51, 0x5E, 0x06, 0xCA, 0x9B, 0x16, 0x51, 0x5D, 0xCF,
+-  0x49, 0x25, 0x4A, 0xA1, 0x6A, 0x77, 0x4C, 0x36, 0x40, 0xF8, 0xB2, 0x9A,
+-  0x15, 0xEA, 0x5C, 0xE5, 0xE6, 0x82, 0xE0, 0x86, 0x82, 0x6B, 0x32, 0xF1,
+-  0x04, 0xC1, 0x5A, 0x1A, 0xED, 0x1E, 0x9A, 0xB6, 0x4C, 0x54, 0x9F, 0xD8,
+-  0x8D, 0xCC, 0xAC, 0x8A, 0xBB, 0x9C, 0x82, 0x3F, 0xA6, 0x53, 0x62, 0xB5,
+-  0x80, 0xE2, 0xBC, 0xDD, 0x67, 0x2B, 0xD9, 0x3F, 0xE4, 0x75, 0x92, 0x6B,
+-  0xAF, 0x62, 0x7C, 0x52, 0xF0, 0xEE, 0x33, 0xDF, 0x1B, 0x1D, 0x47, 0xE6,
+-  0x59, 0x56, 0xA5, 0xB9, 0x5C, 0xE6, 0x77, 0x78, 0x16, 0x63, 0x84, 0x05,
+-  0x6F, 0x0E, 0x2B, 0x31, 0x9D, 0xF7, 0x7F, 0xB2, 0x64, 0x71, 0xE0, 0x2D,
+-  0x3E, 0x62, 0xCE, 0xB5, 0x3F, 0x88, 0xDF, 0x2D, 0xAB, 0x98, 0x65, 0x91,
+-  0xDF, 0x70, 0x14, 0xA5, 0x3F, 0x36, 0xAB, 0x84
++ 0x35, 0x6D, 0xF1, 0x9E, 0xCF, 0xB1, 0xF6, 0x0C, 0x04, 0x21, 0x17, 0xB3,
++ 0xC4, 0x9D, 0xFE, 0x62, 0x1C, 0x1A, 0x45, 0x00, 0x2E, 0x6B, 0xB6, 0x9F,
++ 0x5C, 0xB1, 0xCB, 0xCF, 0xF9, 0x67, 0xEA, 0x62, 0x8A, 0xEB, 0x77, 0x02,
++ 0x42, 0x30, 0x88, 0xB1, 0x48, 0xDF, 0x12, 0x60, 0x6E, 0x92, 0xBB, 0x4B,
++ 0x09, 0x68, 0xD1, 0x70, 0x2B, 0x59, 0xEE, 0x57, 0x96, 0xF9, 0xEA, 0xA3,
++ 0x4C, 0xE9, 0xC9, 0xBD, 0x25, 0x34, 0x66, 0x15, 0x6C, 0xC9, 0x81, 0xD1,
++ 0x48, 0x0F, 0x33, 0x5F, 0x05, 0x4F, 0xC2, 0xC4, 0xDD, 0x09, 0x54, 0x79,
++ 0xA1, 0x57, 0x07, 0x70, 0xA0, 0x33, 0x02, 0x4D, 0x5D, 0xE9, 0x24, 0xD1,
++ 0xEF, 0xF0, 0x61, 0xD0, 0x1D, 0x41, 0xE2, 0x9B, 0x2B, 0x7C, 0xD0, 0x4E,
++ 0x55, 0xD9, 0x6D, 0xA1, 0x16, 0x9F, 0xDA, 0xC3, 0x3B, 0xF1, 0x74, 0xD1,
++ 0x99, 0xF1, 0x63, 0x57, 0xAD, 0xC7, 0x55, 0xF4, 0x97, 0x43, 0x1C, 0xED,
++ 0x1B, 0x7A, 0x32, 0xCB, 0x24, 0xA6, 0x3D, 0x93, 0x37, 0x90, 0x74, 0xEE,
++ 0xD2, 0x8D, 0x4B, 0xBC, 0x72, 0xDA, 0x25, 0x2B, 0x64, 0xE9, 0xCA, 0x69,
++ 0x36, 0xB6, 0xEC, 0x6E, 0x8F, 0x33, 0x0E, 0x74, 0x40, 0x48, 0x51, 0xE2,
++ 0x54, 0x6F, 0xAF, 0x6E, 0x36, 0x54, 0x3A, 0xEC, 0x78, 0x37, 0xE6, 0x1F,
++ 0x76, 0xA5, 0x4D, 0xA6, 0xD9, 0xB3, 0x6B, 0x17, 0x6D, 0x61, 0xFC, 0xA3,
++ 0x85, 0x4A, 0xCC, 0xDA, 0x52, 0xAC, 0x5B, 0xDA, 0x51, 0xE5, 0x7F, 0x5B,
++ 0x52, 0x8B, 0x74, 0x75, 0x99, 0x5C, 0x01, 0xFD, 0x25, 0x3E, 0xCD, 0x86,
++ 0x6F, 0x7A, 0xC0, 0xD8, 0x17, 0x6F, 0xD1, 0xD2, 0x6B, 0xAB, 0x14, 0x1F,
++ 0x3B, 0xB8, 0x15, 0x05, 0x86, 0x40, 0x36, 0xCF, 0xDA, 0x59, 0x2B, 0x9A,
++ 0xE9, 0x1E, 0x6E, 0xD3, 0x6B, 0xA1, 0x19, 0xC5, 0xE6, 0x3F, 0xE9, 0x2E,
++ 0x43, 0xA8, 0x34, 0x0A
  };
  
+-static const unsigned char kat_RSA_X931_SHA1[] = {
+-  0x86, 0xB4, 0x18, 0xBA, 0xD1, 0x80, 0xB6, 0x7C, 0x42, 0x45, 0x4D, 0xDF,
+-  0xE9, 0x2D, 0xE1, 0x83, 0x5F, 0xB5, 0x2F, 0xC9, 0xCD, 0xC4, 0xB2, 0x75,
+-  0x80, 0xA4, 0xF1, 0x4A, 0xE7, 0x83, 0x12, 0x1E, 0x1E, 0x14, 0xB8, 0xAC,
+-  0x35, 0xE2, 0xAA, 0x0B, 0x5C, 0xF8, 0x38, 0x4D, 0x04, 0xEE, 0xA9, 0x97,
+-  0x70, 0xFB, 0x5E, 0xE7, 0xB7, 0xE3, 0x62, 0x23, 0x4B, 0x38, 0xBE, 0xD6,
+-  0x53, 0x15, 0xF7, 0xDF, 0x87, 0xB4, 0x0E, 0xCC, 0xB1, 0x1A, 0x11, 0x19,
+-  0xEE, 0x51, 0xCC, 0x92, 0xDD, 0xBC, 0x63, 0x29, 0x63, 0x0C, 0x59, 0xD7,
+-  0x6F, 0x4C, 0x3C, 0x37, 0x5B, 0x37, 0x03, 0x61, 0x7D, 0x24, 0x1C, 0x99,
+-  0x48, 0xAF, 0x82, 0xFE, 0x32, 0x41, 0x9B, 0xB2, 0xDB, 0xEA, 0xED, 0x76,
+-  0x8E, 0x6E, 0xCA, 0x7E, 0x4E, 0x14, 0xBA, 0x30, 0x84, 0x1C, 0xB3, 0x67,
+-  0xA3, 0x29, 0x80, 0x70, 0x54, 0x68, 0x7D, 0x49
+-};
 +static int fips_rsa_encrypt_test(RSA *rsa, const unsigned char *plaintext, int ptlen)
 +	{
 +	unsigned char *ctbuf = NULL, *ptbuf = NULL;
 +	int ret = 0;
 +	int len;
-+
+ 
+-static const unsigned char kat_RSA_X931_SHA256[] = {
+-  0x7E, 0xA2, 0x77, 0xFE, 0xB8, 0x54, 0x8A, 0xC7, 0x7F, 0x64, 0x54, 0x89,
+-  0xE5, 0x52, 0x15, 0x8E, 0x52, 0x96, 0x4E, 0xA6, 0x58, 0x92, 0x1C, 0xDD,
+-  0xEA, 0xA2, 0x2D, 0x5C, 0xD1, 0x62, 0x00, 0x49, 0x05, 0x95, 0x73, 0xCF,
+-  0x16, 0x76, 0x68, 0xF6, 0xC6, 0x5E, 0x80, 0xB8, 0xB8, 0x7B, 0xC8, 0x9B,
+-  0xC6, 0x53, 0x88, 0x26, 0x20, 0x88, 0x73, 0xB6, 0x13, 0xB8, 0xF0, 0x4B,
+-  0x00, 0x85, 0xF3, 0xDD, 0x07, 0x50, 0xEB, 0x20, 0xC4, 0x38, 0x0E, 0x98,
+-  0xAD, 0x4E, 0x49, 0x2C, 0xD7, 0x65, 0xA5, 0x19, 0x0E, 0x59, 0x01, 0xEC,
+-  0x7E, 0x75, 0x89, 0x69, 0x2E, 0x63, 0x76, 0x85, 0x46, 0x8D, 0xA0, 0x8C,
+-  0x33, 0x1D, 0x82, 0x8C, 0x03, 0xEA, 0x69, 0x88, 0x35, 0xA1, 0x42, 0xBD,
+-  0x21, 0xED, 0x8D, 0xBC, 0xBC, 0xDB, 0x30, 0xFF, 0x86, 0xF0, 0x5B, 0xDC,
+-  0xE3, 0xE2, 0xE8, 0x0A, 0x0A, 0x29, 0x94, 0x80
+-};
 +	ctbuf = OPENSSL_malloc(RSA_size(rsa));
 +	if (!ctbuf)
 +		goto err;
-+
+ 
+-static const unsigned char kat_RSA_X931_SHA384[] = {
+-  0x5C, 0x7D, 0x96, 0x35, 0xEC, 0x7E, 0x11, 0x38, 0xBB, 0x7B, 0xEC, 0x7B,
+-  0xF2, 0x82, 0x8E, 0x99, 0xBD, 0xEF, 0xD8, 0xAE, 0xD7, 0x39, 0x37, 0xCB,
+-  0xE6, 0x4F, 0x5E, 0x0A, 0x13, 0xE4, 0x2E, 0x40, 0xB9, 0xBE, 0x2E, 0xE3,
+-  0xEF, 0x78, 0x83, 0x18, 0x44, 0x35, 0x9C, 0x8E, 0xD7, 0x4A, 0x63, 0xF6,
+-  0x57, 0xC2, 0xB0, 0x08, 0x51, 0x73, 0xCF, 0xCA, 0x99, 0x66, 0xEE, 0x31,
+-  0xD8, 0x69, 0xE9, 0xAB, 0x13, 0x27, 0x7B, 0x41, 0x1E, 0x6D, 0x8D, 0xF1,
+-  0x3E, 0x9C, 0x35, 0x95, 0x58, 0xDD, 0x2B, 0xD5, 0xA0, 0x60, 0x41, 0x79,
+-  0x24, 0x22, 0xE4, 0xB7, 0xBF, 0x47, 0x53, 0xF6, 0x34, 0xD5, 0x7C, 0xFF,
+-  0x0E, 0x09, 0xEE, 0x2E, 0xE2, 0x37, 0xB9, 0xDE, 0xC5, 0x12, 0x44, 0x35,
+-  0xEF, 0x01, 0xE6, 0x5E, 0x39, 0x31, 0x2D, 0x71, 0xA5, 0xDC, 0xC6, 0x6D,
+-  0xE2, 0xCD, 0x85, 0xDB, 0x73, 0x82, 0x65, 0x28
+-};
 +	len = RSA_public_encrypt(ptlen, plaintext, ctbuf, rsa, RSA_PKCS1_PADDING);
 +	if (len <= 0)
 +		goto err;
 +	/* Check ciphertext doesn't match plaintext */
 +	if (len >= ptlen && !memcmp(plaintext, ctbuf, ptlen))
 +		goto err;
-+
+ 
+-static const unsigned char kat_RSA_X931_SHA512[] = {
+-  0xA6, 0x65, 0xA2, 0x77, 0x4F, 0xB3, 0x86, 0xCB, 0x64, 0x3A, 0xC1, 0x63,
+-  0xFC, 0xA1, 0xAA, 0xCB, 0x9B, 0x79, 0xDD, 0x4B, 0xE1, 0xD9, 0xDA, 0xAC,
+-  0xE7, 0x47, 0x09, 0xB2, 0x11, 0x4B, 0x8A, 0xAA, 0x05, 0x9E, 0x77, 0xD7,
+-  0x3A, 0xBD, 0x5E, 0x53, 0x09, 0x4A, 0xE6, 0x0F, 0x5E, 0xF9, 0x14, 0x28,
+-  0xA0, 0x99, 0x74, 0x64, 0x70, 0x4E, 0xF2, 0xE3, 0xFA, 0xC7, 0xF8, 0xC5,
+-  0x6E, 0x2B, 0x79, 0x96, 0x0D, 0x0C, 0xC8, 0x10, 0x34, 0x53, 0xD2, 0xAF,
+-  0x17, 0x0E, 0xE0, 0xBF, 0x79, 0xF6, 0x04, 0x72, 0x10, 0xE0, 0xF6, 0xD0,
+-  0xCE, 0x8A, 0x6F, 0xA1, 0x95, 0x89, 0xBF, 0x58, 0x8F, 0x46, 0x5F, 0x09,
+-  0x9F, 0x09, 0xCA, 0x84, 0x15, 0x85, 0xE0, 0xED, 0x04, 0x2D, 0xFB, 0x7C,
+-  0x36, 0x35, 0x21, 0x31, 0xC3, 0xFD, 0x92, 0x42, 0x11, 0x30, 0x71, 0x1B,
+-  0x60, 0x83, 0x18, 0x88, 0xA3, 0xF5, 0x59, 0xC3
+-};
 +	ptbuf = OPENSSL_malloc(RSA_size(rsa));
 +	if (!ptbuf)
 +		goto err;
@@ -431,7 +1037,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
 +		goto err;
 +
 +	ret = 1;
-+
+ 
 +	err:
 +	if (ctbuf)
 +		OPENSSL_free(ctbuf);
@@ -442,7 +1048,7 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
  
  int FIPS_selftest_rsa()
  	{
-@@ -353,7 +389,7 @@ int FIPS_selftest_rsa()
+@@ -353,7 +487,7 @@ int FIPS_selftest_rsa()
  	if ((pk=EVP_PKEY_new()) == NULL)
  		goto err;
  
@@ -451,13 +1057,35 @@ diff -up openssl-1.0.1e/crypto/fips/fips_rsa_selftest.c.fips-reqs openssl-1.0.1e
  
  	if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
  				kat_RSA_SHA1, sizeof(kat_RSA_SHA1),
-@@ -430,13 +466,15 @@ int FIPS_selftest_rsa()
- 			"RSA SHA512 X931"))
+@@ -407,36 +541,15 @@ int FIPS_selftest_rsa()
+ 				"RSA SHA512 PSS"))
  		goto err;
  
+-
+-	if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+-			kat_RSA_X931_SHA1, sizeof(kat_RSA_X931_SHA1),
+-			EVP_sha1(), EVP_MD_CTX_FLAG_PAD_X931,
+-			"RSA SHA1 X931"))
+-		goto err;
+-	/* NB: SHA224 not supported in X9.31 */
+-	if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+-			kat_RSA_X931_SHA256, sizeof(kat_RSA_X931_SHA256),
+-			EVP_sha256(), EVP_MD_CTX_FLAG_PAD_X931,
+-			"RSA SHA256 X931"))
+-		goto err;
+-	if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+-			kat_RSA_X931_SHA384, sizeof(kat_RSA_X931_SHA384),
+-			EVP_sha384(), EVP_MD_CTX_FLAG_PAD_X931,
+-			"RSA SHA384 X931"))
+-		goto err;
+-	if (!fips_pkey_signature_test(pk, kat_tbs, sizeof(kat_tbs) - 1,
+-			kat_RSA_X931_SHA512, sizeof(kat_RSA_X931_SHA512),
+-			EVP_sha512(), EVP_MD_CTX_FLAG_PAD_X931,
+-			"RSA SHA512 X931"))
 +	if (!fips_rsa_encrypt_test(key, kat_tbs, sizeof(kat_tbs) - 1))
-+		goto err;
+ 		goto err;
  
+-
  	ret = 1;
  
  	err:
@@ -796,7 +1424,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
   * All rights reserved.
   *
   * This package is an SSL implementation written
-@@ -165,6 +166,222 @@ int RSA_generate_key_ex(RSA *rsa, int bi
+@@ -165,6 +166,236 @@ int RSA_generate_key_ex(RSA *rsa, int bi
  	return rsa_builtin_keygen(rsa, bits, e_value, cb);
  	}
  
@@ -819,7 +1447,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
 +	    	return 0;
 +	    	}
 +
-+	if (bits != 2048 && bits != 3072)
++	if ((pbits & 0xFF) || (getenv("OPENSSL_ENFORCE_MODULUS_BITS") && bits != 2048 && bits != 3072))
 +		{
 +		FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN, FIPS_R_INVALID_KEY_LENGTH);
 +		return 0;
@@ -866,6 +1494,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
 +	if (!BN_is_zero(rsa->p) && !BN_is_zero(rsa->q))
 +		test = 1;
 +
++retry:
 +	/* generate p and q */
 +	for (i = 0; i < 5 * pbits; i++)
 +		{
@@ -958,7 +1587,18 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
 +	/* calculate d */
 +	if (!BN_sub(r1,rsa->p,BN_value_one())) goto err;	/* p-1 */
 +	if (!BN_sub(r2,rsa->q,BN_value_one())) goto err;	/* q-1 */
-+	if (!BN_mul(r0,r1,r2,ctx)) goto err;	/* (p-1)(q-1) */
++	
++	if (!BN_gcd(r0, r1, r2, ctx)) goto err;
++	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
++		{
++		  pr0 = &local_r0;
++		  BN_with_flags(pr0, r0, BN_FLG_CONSTTIME);
++		}
++	else
++	  pr0 = r0;
++	if (!BN_div(r0, NULL, r1, pr0, ctx)) goto err;
++	if (!BN_mul(r0,r0,r2,ctx)) goto err;	/* lcm(p-1, q-1) */
++
 +	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
 +		{
 +		  pr0 = &local_r0;
@@ -968,6 +1608,8 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
 +	  pr0 = r0;
 +	if (!BN_mod_inverse(rsa->d,rsa->e,pr0,ctx)) goto err;	/* d */
 +
++	if (BN_num_bits(rsa->d) < pbits) goto retry; /* d is too small */
++
 +	/* set up d for correct BN_FLG_CONSTTIME flag */
 +	if (!(rsa->flags & RSA_FLAG_NO_CONSTTIME))
 +		{
@@ -1019,7 +1661,7 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
  static int rsa_builtin_keygen(RSA *rsa, int bits, BIGNUM *e_value, BN_GENCB *cb)
  	{
  	BIGNUM *r0=NULL,*r1=NULL,*r2=NULL,*r3=NULL,*tmp;
-@@ -176,17 +393,7 @@ static int rsa_builtin_keygen(RSA *rsa,
+@@ -176,17 +407,12 @@ static int rsa_builtin_keygen(RSA *rsa,
  #ifdef OPENSSL_FIPS
  	if (FIPS_module_mode())
  		{
@@ -1029,16 +1671,16 @@ diff -up openssl-1.0.1e/crypto/rsa/rsa_gen.c.fips-reqs openssl-1.0.1e/crypto/rsa
 -	    	return 0;
 -	    	}
 -
--		if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS)
--		    {
--		    FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT);
--		    return 0;
--		    }
+ 		if (bits < OPENSSL_RSA_FIPS_MIN_MODULUS_BITS)
+ 		    {
+ 		    FIPSerr(FIPS_F_RSA_BUILTIN_KEYGEN,FIPS_R_KEY_TOO_SHORT);
+ 		    return 0;
+ 		    }
 +		return FIPS_rsa_builtin_keygen(rsa, bits, e_value, cb);
  		}
  #endif
  
-@@ -301,17 +508,6 @@ static int rsa_builtin_keygen(RSA *rsa,
+@@ -301,17 +527,6 @@ static int rsa_builtin_keygen(RSA *rsa,
  		p = rsa->p;
  	if (!BN_mod_inverse(rsa->iqmp,rsa->q,p,ctx)) goto err;
  
diff --git a/SOURCES/openssl-1.0.1e-ppc-asm-update.patch b/SOURCES/openssl-1.0.1e-ppc-asm-update.patch
new file mode 100644
index 0000000..91efede
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-ppc-asm-update.patch
@@ -0,0 +1,6677 @@
+diff --git a/Configure b/Configure
+index 9c803dc..5a5c2d8 100755
+--- a/Configure
++++ b/Configure
+@@ -139,8 +139,8 @@ my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes
+ my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void";
+ my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32";
+ my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64";
+-my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::";
+-my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::::";
++my $ppc64_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o vpaes-ppc.o aesp8-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o sha256p8-ppc.o sha512p8-ppc.o:::::::ghashp8-ppc.o:";
++my $ppc32_asm=$ppc64_asm;
+ my $no_asm=":::::::::::::::void";
+ 
+ # As for $BSDthreads. Idea is to maintain "collective" set of flags,
+@@ -357,6 +357,7 @@ my %table=(
+ ####
+ "linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ppc64",	"gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
+ "linux-ia64",	"gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+@@ -462,8 +463,8 @@ my %table=(
+ 
+ #### IBM's AIX.
+ "aix3-cc",  "cc:-O -DB_ENDIAN -qmaxmem=16384::(unknown):AIX::BN_LLONG RC4_CHAR:::",
+-"aix-gcc",  "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
+-"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:${ppc64_asm}:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
++"aix-gcc",  "gcc:-O -DB_ENDIAN::-pthread:AIX::BN_LLONG RC4_CHAR:$ppc32_asm:aix32:dlfcn:aix-shared::-shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X32",
++"aix64-gcc","gcc:-maix64 -O -DB_ENDIAN::-pthread:AIX::SIXTY_FOUR_BIT_LONG RC4_CHAR:$ppc64_asm:aix64:dlfcn:aix-shared::-maix64 -shared -Wl,-G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X64",
+ # Below targets assume AIX 5. Idea is to effectively disregard $OBJECT_MODE
+ # at build time. $OBJECT_MODE is respected at ./config stage!
+ "aix-cc",   "cc:-q32 -O -DB_ENDIAN -qmaxmem=16384 -qro -qroconst::-qthreaded -D_THREAD_SAFE:AIX::BN_LLONG RC4_CHAR:${ppc32_asm}:aix32:dlfcn:aix-shared::-q32 -G:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)::-X 32",
+@@ -1525,7 +1526,7 @@ else	{
+ 	$wp_obj="wp_block.o";
+ 	}
+ $cmll_obj=$cmll_enc	unless ($cmll_obj =~ /.o$/);
+-if ($modes_obj =~ /ghash/)
++if ($modes_obj =~ /ghash\-/)
+ 	{
+ 	$cflags.=" -DGHASH_ASM";
+ 	}
+diff --git a/config b/config
+index 88b9bc6..8b80802 100755
+--- a/config
++++ b/config
+@@ -587,13 +587,20 @@ case "$GUESSOS" in
+ 	fi
+ 	;;
+   ppc64-*-linux2)
+-	echo "WARNING! If you wish to build 64-bit library, then you have to"
+-	echo "         invoke './Configure linux-ppc64' *manually*."
+-	if [ "$TEST" = "false" -a -t 1 ]; then
+-	    echo "         You have about 5 seconds to press Ctrl-C to abort."
+-	    (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
++	if [ -z "$KERNEL_BITS" ]; then
++	    echo "WARNING! If you wish to build 64-bit library, then you have to"
++	    echo "         invoke './Configure linux-ppc64' *manually*."
++	    if [ "$TEST" = "false" -a -t 1 ]; then
++		echo "         You have about 5 seconds to press Ctrl-C to abort."
++		(trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
++	    fi
++	fi
++	if [ "$KERNEL_BITS" = "64" ]; then
++	    OUT="linux-ppc64"
++	else
++	    OUT="linux-ppc"
++	    (echo "__LP64__" | gcc -E -x c - 2>/dev/null | grep "^__LP64__" 2>&1 > /dev/null) || options="$options -m32"
+ 	fi
+-	OUT="linux-ppc"
+ 	;;
+   ppc-*-linux2) OUT="linux-ppc" ;;
+   ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
+diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile
+index 45ede0a..847f4ee 100644
+--- a/crypto/aes/Makefile
++++ b/crypto/aes/Makefile
+@@ -71,6 +71,10 @@ aes-sparcv9.s: asm/aes-sparcv9.pl
+ 
+ aes-ppc.s:	asm/aes-ppc.pl
+ 	$(PERL) asm/aes-ppc.pl $(PERLASM_SCHEME) $@
++vpaes-ppc.s:	asm/vpaes-ppc.pl
++	$(PERL) asm/vpaes-ppc.pl $(PERLASM_SCHEME) $@
++aesp8-ppc.s:	asm/aesp8-ppc.pl
++	$(PERL) asm/aesp8-ppc.pl $(PERLASM_SCHEME) $@
+ 
+ aes-parisc.s:	asm/aes-parisc.pl
+ 	$(PERL) asm/aes-parisc.pl $(PERLASM_SCHEME) $@
+diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl
+index 7c52cbe..7a99fc3 100644
+--- a/crypto/aes/asm/aes-ppc.pl
++++ b/crypto/aes/asm/aes-ppc.pl
+@@ -45,6 +45,8 @@ if ($flavour =~ /64/) {
+ 	$PUSH	="stw";
+ } else { die "nonsense $flavour"; }
+ 
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -68,7 +70,7 @@ $key="r5";
+ $Tbl0="r3";
+ $Tbl1="r6";
+ $Tbl2="r7";
+-$Tbl3="r2";
++$Tbl3=$out;	# stay away from "r2"; $out is offloaded to stack
+ 
+ $s0="r8";
+ $s1="r9";
+@@ -76,7 +78,7 @@ $s2="r10";
+ $s3="r11";
+ 
+ $t0="r12";
+-$t1="r13";
++$t1="r0";	# stay away from "r13";
+ $t2="r14";
+ $t3="r15";
+ 
+@@ -100,9 +102,6 @@ $acc13="r29";
+ $acc14="r30";
+ $acc15="r31";
+ 
+-# stay away from TLS pointer
+-if ($SIZE_T==8)	{ die if ($t1 ne "r13");  $t1="r0";		}
+-else		{ die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0";	}
+ $mask80=$Tbl2;
+ $mask1b=$Tbl3;
+ 
+@@ -337,8 +336,7 @@ $code.=<<___;
+ 	$STU	$sp,-$FRAME($sp)
+ 	mflr	r0
+ 
+-	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
+-	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
++	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
+ 	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
+ 	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
+ 	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -365,16 +363,61 @@ $code.=<<___;
+ 	bne	Lenc_unaligned
+ 
+ Lenc_unaligned_ok:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ 	lwz	$s0,0($inp)
+ 	lwz	$s1,4($inp)
+ 	lwz	$s2,8($inp)
+ 	lwz	$s3,12($inp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++	lwz	$t0,0($inp)
++	lwz	$t1,4($inp)
++	lwz	$t2,8($inp)
++	lwz	$t3,12($inp)
++	rotlwi	$s0,$t0,8
++	rotlwi	$s1,$t1,8
++	rotlwi	$s2,$t2,8
++	rotlwi	$s3,$t3,8
++	rlwimi	$s0,$t0,24,0,7
++	rlwimi	$s1,$t1,24,0,7
++	rlwimi	$s2,$t2,24,0,7
++	rlwimi	$s3,$t3,24,0,7
++	rlwimi	$s0,$t0,24,16,23
++	rlwimi	$s1,$t1,24,16,23
++	rlwimi	$s2,$t2,24,16,23
++	rlwimi	$s3,$t3,24,16,23
++___
++$code.=<<___;
+ 	bl	LAES_Te
+ 	bl	Lppc_AES_encrypt_compact
++	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++	rotlwi	$t0,$s0,8
++	rotlwi	$t1,$s1,8
++	rotlwi	$t2,$s2,8
++	rotlwi	$t3,$s3,8
++	rlwimi	$t0,$s0,24,0,7
++	rlwimi	$t1,$s1,24,0,7
++	rlwimi	$t2,$s2,24,0,7
++	rlwimi	$t3,$s3,24,0,7
++	rlwimi	$t0,$s0,24,16,23
++	rlwimi	$t1,$s1,24,16,23
++	rlwimi	$t2,$s2,24,16,23
++	rlwimi	$t3,$s3,24,16,23
++	stw	$t0,0($out)
++	stw	$t1,4($out)
++	stw	$t2,8($out)
++	stw	$t3,12($out)
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ 	stw	$s0,0($out)
+ 	stw	$s1,4($out)
+ 	stw	$s2,8($out)
+ 	stw	$s3,12($out)
++___
++$code.=<<___;
+ 	b	Lenc_done
+ 
+ Lenc_unaligned:
+@@ -417,6 +460,7 @@ Lenc_xpage:
+ 
+ 	bl	LAES_Te
+ 	bl	Lppc_AES_encrypt_compact
++	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
+ 
+ 	extrwi	$acc00,$s0,8,0
+ 	extrwi	$acc01,$s0,8,8
+@@ -449,8 +493,6 @@ Lenc_xpage:
+ 
+ Lenc_done:
+ 	$POP	r0,`$FRAME+$LRSAVE`($sp)
+-	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
+-	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
+ 	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
+ 	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
+ 	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -764,6 +806,7 @@ Lenc_compact_done:
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	.AES_encrypt,.-.AES_encrypt
+ 
+ .globl	.AES_decrypt
+ .align	7
+@@ -771,8 +814,7 @@ Lenc_compact_done:
+ 	$STU	$sp,-$FRAME($sp)
+ 	mflr	r0
+ 
+-	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
+-	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
++	$PUSH	$out,`$FRAME-$SIZE_T*19`($sp)
+ 	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
+ 	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
+ 	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -799,16 +841,61 @@ Lenc_compact_done:
+ 	bne	Ldec_unaligned
+ 
+ Ldec_unaligned_ok:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ 	lwz	$s0,0($inp)
+ 	lwz	$s1,4($inp)
+ 	lwz	$s2,8($inp)
+ 	lwz	$s3,12($inp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++	lwz	$t0,0($inp)
++	lwz	$t1,4($inp)
++	lwz	$t2,8($inp)
++	lwz	$t3,12($inp)
++	rotlwi	$s0,$t0,8
++	rotlwi	$s1,$t1,8
++	rotlwi	$s2,$t2,8
++	rotlwi	$s3,$t3,8
++	rlwimi	$s0,$t0,24,0,7
++	rlwimi	$s1,$t1,24,0,7
++	rlwimi	$s2,$t2,24,0,7
++	rlwimi	$s3,$t3,24,0,7
++	rlwimi	$s0,$t0,24,16,23
++	rlwimi	$s1,$t1,24,16,23
++	rlwimi	$s2,$t2,24,16,23
++	rlwimi	$s3,$t3,24,16,23
++___
++$code.=<<___;
+ 	bl	LAES_Td
+ 	bl	Lppc_AES_decrypt_compact
++	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++	rotlwi	$t0,$s0,8
++	rotlwi	$t1,$s1,8
++	rotlwi	$t2,$s2,8
++	rotlwi	$t3,$s3,8
++	rlwimi	$t0,$s0,24,0,7
++	rlwimi	$t1,$s1,24,0,7
++	rlwimi	$t2,$s2,24,0,7
++	rlwimi	$t3,$s3,24,0,7
++	rlwimi	$t0,$s0,24,16,23
++	rlwimi	$t1,$s1,24,16,23
++	rlwimi	$t2,$s2,24,16,23
++	rlwimi	$t3,$s3,24,16,23
++	stw	$t0,0($out)
++	stw	$t1,4($out)
++	stw	$t2,8($out)
++	stw	$t3,12($out)
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
+ 	stw	$s0,0($out)
+ 	stw	$s1,4($out)
+ 	stw	$s2,8($out)
+ 	stw	$s3,12($out)
++___
++$code.=<<___;
+ 	b	Ldec_done
+ 
+ Ldec_unaligned:
+@@ -851,6 +938,7 @@ Ldec_xpage:
+ 
+ 	bl	LAES_Td
+ 	bl	Lppc_AES_decrypt_compact
++	$POP	$out,`$FRAME-$SIZE_T*19`($sp)
+ 
+ 	extrwi	$acc00,$s0,8,0
+ 	extrwi	$acc01,$s0,8,8
+@@ -883,8 +971,6 @@ Ldec_xpage:
+ 
+ Ldec_done:
+ 	$POP	r0,`$FRAME+$LRSAVE`($sp)
+-	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
+-	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
+ 	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
+ 	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
+ 	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -1355,6 +1441,7 @@ Ldec_compact_done:
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	.AES_decrypt,.-.AES_decrypt
+ 
+ .asciz	"AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
+ .align	7
+diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
+new file mode 100755
+index 0000000..3ee8979
+--- /dev/null
++++ b/crypto/aes/asm/aesp8-ppc.pl
+@@ -0,0 +1,1940 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# This module implements support for AES instructions as per PowerISA
++# specification version 2.07, first implemented by POWER8 processor.
++# The module is endian-agnostic in sense that it supports both big-
++# and little-endian cases. Data alignment in parallelizable modes is
++# handled with VSX loads and stores, which implies MSR.VSX flag being
++# set. It should also be noted that ISA specification doesn't prohibit
++# alignment exceptions for these instructions on page boundaries.
++# Initially alignment was handled in pure AltiVec/VMX way [when data
++# is aligned programmatically, which in turn guarantees exception-
++# free execution], but it turned to hamper performance when vcipher
++# instructions are interleaved. It's reckoned that eventual
++# misalignment penalties at page boundaries are in average lower
++# than additional overhead in pure AltiVec approach.
++
++$flavour = shift;
++
++if ($flavour =~ /64/) {
++	$SIZE_T	=8;
++	$LRSAVE	=2*$SIZE_T;
++	$STU	="stdu";
++	$POP	="ld";
++	$PUSH	="std";
++	$UCMP	="cmpld";
++	$SHL	="sldi";
++} elsif ($flavour =~ /32/) {
++	$SIZE_T	=4;
++	$LRSAVE	=$SIZE_T;
++	$STU	="stwu";
++	$POP	="lwz";
++	$PUSH	="stw";
++	$UCMP	="cmplw";
++	$SHL	="slwi";
++} else { die "nonsense $flavour"; }
++
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
++
++$FRAME=8*$SIZE_T;
++$prefix="aes_p8";
++
++$sp="r1";
++$vrsave="r12";
++
++#########################################################################
++{{{	# Key setup procedures						#
++my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
++my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
++my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
++
++$code.=<<___;
++.machine	"any"
++
++.text
++
++.align	7
++rcon:
++.long	0x01000000, 0x01000000, 0x01000000, 0x01000000	?rev
++.long	0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000	?rev
++.long	0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c	?rev
++.long	0,0,0,0						?asis
++Lconsts:
++	mflr	r0
++	bcl	20,31,\$+4
++	mflr	$ptr	 #vvvvv "distance between . and rcon
++	addi	$ptr,$ptr,-0x48
++	mtlr	r0
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++.asciz	"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++
++.globl	.${prefix}_set_encrypt_key
++.align	5
++.${prefix}_set_encrypt_key:
++Lset_encrypt_key:
++	mflr		r11
++	$PUSH		r11,$LRSAVE($sp)
++
++	li		$ptr,-1
++	${UCMP}i	$inp,0
++	beq-		Lenc_key_abort		# if ($inp==0) return -1;
++	${UCMP}i	$out,0
++	beq-		Lenc_key_abort		# if ($out==0) return -1;
++	li		$ptr,-2
++	cmpwi		$bits,128
++	blt-		Lenc_key_abort
++	cmpwi		$bits,256
++	bgt-		Lenc_key_abort
++	andi.		r0,$bits,0x3f
++	bne-		Lenc_key_abort
++
++	lis		r0,0xfff0
++	mfspr		$vrsave,256
++	mtspr		256,r0
++
++	bl		Lconsts
++	mtlr		r11
++
++	neg		r9,$inp
++	lvx		$in0,0,$inp
++	addi		$inp,$inp,15		# 15 is not typo
++	lvsr		$key,0,r9		# borrow $key
++	li		r8,0x20
++	cmpwi		$bits,192
++	lvx		$in1,0,$inp
++	le?vspltisb	$mask,0x0f		# borrow $mask
++	lvx		$rcon,0,$ptr
++	le?vxor		$key,$key,$mask		# adjust for byte swap
++	lvx		$mask,r8,$ptr
++	addi		$ptr,$ptr,0x10
++	vperm		$in0,$in0,$in1,$key	# align [and byte swap in LE]
++	li		$cnt,8
++	vxor		$zero,$zero,$zero
++	mtctr		$cnt
++
++	?lvsr		$outperm,0,$out
++	vspltisb	$outmask,-1
++	lvx		$outhead,0,$out
++	?vperm		$outmask,$zero,$outmask,$outperm
++
++	blt		Loop128
++	addi		$inp,$inp,8
++	beq		L192
++	addi		$inp,$inp,8
++	b		L256
++
++.align	4
++Loop128:
++	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
++	vsldoi		$tmp,$zero,$in0,12	# >>32
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	vcipherlast	$key,$key,$rcon
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	 vadduwm	$rcon,$rcon,$rcon
++	vxor		$in0,$in0,$key
++	bdnz		Loop128
++
++	lvx		$rcon,0,$ptr		# last two round keys
++
++	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
++	vsldoi		$tmp,$zero,$in0,12	# >>32
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	vcipherlast	$key,$key,$rcon
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	 vadduwm	$rcon,$rcon,$rcon
++	vxor		$in0,$in0,$key
++
++	vperm		$key,$in0,$in0,$mask	# rotate-n-splat
++	vsldoi		$tmp,$zero,$in0,12	# >>32
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	vcipherlast	$key,$key,$rcon
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	vxor		$in0,$in0,$key
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	 stvx		$stage,0,$out
++
++	addi		$inp,$out,15		# 15 is not typo
++	addi		$out,$out,0x50
++
++	li		$rounds,10
++	b		Ldone
++
++.align	4
++L192:
++	lvx		$tmp,0,$inp
++	li		$cnt,4
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
++	vspltisb	$key,8			# borrow $key
++	mtctr		$cnt
++	vsububm		$mask,$mask,$key	# adjust the mask
++
++Loop192:
++	vperm		$key,$in1,$in1,$mask	# roate-n-splat
++	vsldoi		$tmp,$zero,$in0,12	# >>32
++	vcipherlast	$key,$key,$rcon
++
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++
++	 vsldoi		$stage,$zero,$in1,8
++	vspltw		$tmp,$in0,3
++	vxor		$tmp,$tmp,$in1
++	vsldoi		$in1,$zero,$in1,12	# >>32
++	 vadduwm	$rcon,$rcon,$rcon
++	vxor		$in1,$in1,$tmp
++	vxor		$in0,$in0,$key
++	vxor		$in1,$in1,$key
++	 vsldoi		$stage,$stage,$in0,8
++
++	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
++	vsldoi		$tmp,$zero,$in0,12	# >>32
++	 vperm		$outtail,$stage,$stage,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	vcipherlast	$key,$key,$rcon
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++
++	 vsldoi		$stage,$in0,$in1,8
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	 vperm		$outtail,$stage,$stage,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++
++	vspltw		$tmp,$in0,3
++	vxor		$tmp,$tmp,$in1
++	vsldoi		$in1,$zero,$in1,12	# >>32
++	 vadduwm	$rcon,$rcon,$rcon
++	vxor		$in1,$in1,$tmp
++	vxor		$in0,$in0,$key
++	vxor		$in1,$in1,$key
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	 stvx		$stage,0,$out
++	 addi		$inp,$out,15		# 15 is not typo
++	 addi		$out,$out,16
++	bdnz		Loop192
++
++	li		$rounds,12
++	addi		$out,$out,0x20
++	b		Ldone
++
++.align	4
++L256:
++	lvx		$tmp,0,$inp
++	li		$cnt,7
++	li		$rounds,14
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++	vperm		$in1,$in1,$tmp,$key	# align [and byte swap in LE]
++	mtctr		$cnt
++
++Loop256:
++	vperm		$key,$in1,$in1,$mask	# rotate-n-splat
++	vsldoi		$tmp,$zero,$in0,12	# >>32
++	 vperm		$outtail,$in1,$in1,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	vcipherlast	$key,$key,$rcon
++	 stvx		$stage,0,$out
++	 addi		$out,$out,16
++
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in0,$in0,$tmp
++	 vadduwm	$rcon,$rcon,$rcon
++	vxor		$in0,$in0,$key
++	 vperm		$outtail,$in0,$in0,$outperm	# rotate
++	 vsel		$stage,$outhead,$outtail,$outmask
++	 vmr		$outhead,$outtail
++	 stvx		$stage,0,$out
++	 addi		$inp,$out,15		# 15 is not typo
++	 addi		$out,$out,16
++	bdz		Ldone
++
++	vspltw		$key,$in0,3		# just splat
++	vsldoi		$tmp,$zero,$in1,12	# >>32
++	vsbox		$key,$key
++
++	vxor		$in1,$in1,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in1,$in1,$tmp
++	vsldoi		$tmp,$zero,$tmp,12	# >>32
++	vxor		$in1,$in1,$tmp
++
++	vxor		$in1,$in1,$key
++	b		Loop256
++
++.align	4
++Ldone:
++	lvx		$in1,0,$inp		# redundant in aligned case
++	vsel		$in1,$outhead,$in1,$outmask
++	stvx		$in1,0,$inp
++	li		$ptr,0
++	mtspr		256,$vrsave
++	stw		$rounds,0($out)
++
++Lenc_key_abort:
++	mr		r3,$ptr
++	blr
++	.long		0
++	.byte		0,12,0x14,1,0,0,3,0
++	.long		0
++.size	.${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
++
++.globl	.${prefix}_set_decrypt_key
++.align	5
++.${prefix}_set_decrypt_key:
++	$STU		$sp,-$FRAME($sp)
++	mflr		r10
++	$PUSH		r10,$FRAME+$LRSAVE($sp)
++	bl		Lset_encrypt_key
++	mtlr		r10
++
++	cmpwi		r3,0
++	bne-		Ldec_key_abort
++
++	slwi		$cnt,$rounds,4
++	subi		$inp,$out,240		# first round key
++	srwi		$rounds,$rounds,1
++	add		$out,$inp,$cnt		# last round key
++	mtctr		$rounds
++
++Ldeckey:
++	lwz		r0, 0($inp)
++	lwz		r6, 4($inp)
++	lwz		r7, 8($inp)
++	lwz		r8, 12($inp)
++	addi		$inp,$inp,16
++	lwz		r9, 0($out)
++	lwz		r10,4($out)
++	lwz		r11,8($out)
++	lwz		r12,12($out)
++	stw		r0, 0($out)
++	stw		r6, 4($out)
++	stw		r7, 8($out)
++	stw		r8, 12($out)
++	subi		$out,$out,16
++	stw		r9, -16($inp)
++	stw		r10,-12($inp)
++	stw		r11,-8($inp)
++	stw		r12,-4($inp)
++	bdnz		Ldeckey
++
++	xor		r3,r3,r3		# return value
++Ldec_key_abort:
++	addi		$sp,$sp,$FRAME
++	blr
++	.long		0
++	.byte		0,12,4,1,0x80,0,3,0
++	.long		0
++.size	.${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
++___
++}}}
++#########################################################################
++{{{	# Single block en- and decrypt procedures			#
++sub gen_block () {
++my $dir = shift;
++my $n   = $dir eq "de" ? "n" : "";
++my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
++
++$code.=<<___;
++.globl	.${prefix}_${dir}crypt
++.align	5
++.${prefix}_${dir}crypt:
++	lwz		$rounds,240($key)
++	lis		r0,0xfc00
++	mfspr		$vrsave,256
++	li		$idx,15			# 15 is not typo
++	mtspr		256,r0
++
++	lvx		v0,0,$inp
++	neg		r11,$out
++	lvx		v1,$idx,$inp
++	lvsl		v2,0,$inp		# inpperm
++	le?vspltisb	v4,0x0f
++	?lvsl		v3,0,r11		# outperm
++	le?vxor		v2,v2,v4
++	li		$idx,16
++	vperm		v0,v0,v1,v2		# align [and byte swap in LE]
++	lvx		v1,0,$key
++	?lvsl		v5,0,$key		# keyperm
++	srwi		$rounds,$rounds,1
++	lvx		v2,$idx,$key
++	addi		$idx,$idx,16
++	subi		$rounds,$rounds,1
++	?vperm		v1,v1,v2,v5		# align round key
++
++	vxor		v0,v0,v1
++	lvx		v1,$idx,$key
++	addi		$idx,$idx,16
++	mtctr		$rounds
++
++Loop_${dir}c:
++	?vperm		v2,v2,v1,v5
++	v${n}cipher	v0,v0,v2
++	lvx		v2,$idx,$key
++	addi		$idx,$idx,16
++	?vperm		v1,v1,v2,v5
++	v${n}cipher	v0,v0,v1
++	lvx		v1,$idx,$key
++	addi		$idx,$idx,16
++	bdnz		Loop_${dir}c
++
++	?vperm		v2,v2,v1,v5
++	v${n}cipher	v0,v0,v2
++	lvx		v2,$idx,$key
++	?vperm		v1,v1,v2,v5
++	v${n}cipherlast	v0,v0,v1
++
++	vspltisb	v2,-1
++	vxor		v1,v1,v1
++	li		$idx,15			# 15 is not typo
++	?vperm		v2,v1,v2,v3		# outmask
++	le?vxor		v3,v3,v4
++	lvx		v1,0,$out		# outhead
++	vperm		v0,v0,v0,v3		# rotate [and byte swap in LE]
++	vsel		v1,v1,v0,v2
++	lvx		v4,$idx,$out
++	stvx		v1,0,$out
++	vsel		v0,v0,v4,v2
++	stvx		v0,$idx,$out
++
++	mtspr		256,$vrsave
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0,0,3,0
++	.long		0
++.size	.${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
++___
++}
++&gen_block("en");
++&gen_block("de");
++}}}
++#########################################################################
++{{{	# CBC en- and decrypt procedures				#
++my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
++my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
++						map("v$_",(4..10));
++$code.=<<___;
++.globl	.${prefix}_cbc_encrypt
++.align	5
++.${prefix}_cbc_encrypt:
++	${UCMP}i	$len,16
++	bltlr-
++
++	cmpwi		$enc,0			# test direction
++	lis		r0,0xffe0
++	mfspr		$vrsave,256
++	mtspr		256,r0
++
++	li		$idx,15
++	vxor		$rndkey0,$rndkey0,$rndkey0
++	le?vspltisb	$tmp,0x0f
++
++	lvx		$ivec,0,$ivp		# load [unaligned] iv
++	lvsl		$inpperm,0,$ivp
++	lvx		$inptail,$idx,$ivp
++	le?vxor		$inpperm,$inpperm,$tmp
++	vperm		$ivec,$ivec,$inptail,$inpperm
++
++	neg		r11,$inp
++	?lvsl		$keyperm,0,$key		# prepare for unaligned key
++	lwz		$rounds,240($key)
++
++	lvsr		$inpperm,0,r11		# prepare for unaligned load
++	lvx		$inptail,0,$inp
++	addi		$inp,$inp,15		# 15 is not typo
++	le?vxor		$inpperm,$inpperm,$tmp
++
++	?lvsr		$outperm,0,$out		# prepare for unaligned store
++	vspltisb	$outmask,-1
++	lvx		$outhead,0,$out
++	?vperm		$outmask,$rndkey0,$outmask,$outperm
++	le?vxor		$outperm,$outperm,$tmp
++
++	srwi		$rounds,$rounds,1
++	li		$idx,16
++	subi		$rounds,$rounds,1
++	beq		Lcbc_dec
++
++Lcbc_enc:
++	vmr		$inout,$inptail
++	lvx		$inptail,0,$inp
++	addi		$inp,$inp,16
++	mtctr		$rounds
++	subi		$len,$len,16		# len-=16
++
++	lvx		$rndkey0,0,$key
++	 vperm		$inout,$inout,$inptail,$inpperm
++	lvx		$rndkey1,$idx,$key
++	addi		$idx,$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vxor		$inout,$inout,$rndkey0
++	lvx		$rndkey0,$idx,$key
++	addi		$idx,$idx,16
++	vxor		$inout,$inout,$ivec
++
++Loop_cbc_enc:
++	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
++	vcipher		$inout,$inout,$rndkey1
++	lvx		$rndkey1,$idx,$key
++	addi		$idx,$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vcipher		$inout,$inout,$rndkey0
++	lvx		$rndkey0,$idx,$key
++	addi		$idx,$idx,16
++	bdnz		Loop_cbc_enc
++
++	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
++	vcipher		$inout,$inout,$rndkey1
++	lvx		$rndkey1,$idx,$key
++	li		$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vcipherlast	$ivec,$inout,$rndkey0
++	${UCMP}i	$len,16
++
++	vperm		$tmp,$ivec,$ivec,$outperm
++	vsel		$inout,$outhead,$tmp,$outmask
++	vmr		$outhead,$tmp
++	stvx		$inout,0,$out
++	addi		$out,$out,16
++	bge		Lcbc_enc
++
++	b		Lcbc_done
++
++.align	4
++Lcbc_dec:
++	${UCMP}i	$len,128
++	bge		_aesp8_cbc_decrypt8x
++	vmr		$tmp,$inptail
++	lvx		$inptail,0,$inp
++	addi		$inp,$inp,16
++	mtctr		$rounds
++	subi		$len,$len,16		# len-=16
++
++	lvx		$rndkey0,0,$key
++	 vperm		$tmp,$tmp,$inptail,$inpperm
++	lvx		$rndkey1,$idx,$key
++	addi		$idx,$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vxor		$inout,$tmp,$rndkey0
++	lvx		$rndkey0,$idx,$key
++	addi		$idx,$idx,16
++
++Loop_cbc_dec:
++	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
++	vncipher	$inout,$inout,$rndkey1
++	lvx		$rndkey1,$idx,$key
++	addi		$idx,$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vncipher	$inout,$inout,$rndkey0
++	lvx		$rndkey0,$idx,$key
++	addi		$idx,$idx,16
++	bdnz		Loop_cbc_dec
++
++	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
++	vncipher	$inout,$inout,$rndkey1
++	lvx		$rndkey1,$idx,$key
++	li		$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vncipherlast	$inout,$inout,$rndkey0
++	${UCMP}i	$len,16
++
++	vxor		$inout,$inout,$ivec
++	vmr		$ivec,$tmp
++	vperm		$tmp,$inout,$inout,$outperm
++	vsel		$inout,$outhead,$tmp,$outmask
++	vmr		$outhead,$tmp
++	stvx		$inout,0,$out
++	addi		$out,$out,16
++	bge		Lcbc_dec
++
++Lcbc_done:
++	addi		$out,$out,-1
++	lvx		$inout,0,$out		# redundant in aligned case
++	vsel		$inout,$outhead,$inout,$outmask
++	stvx		$inout,0,$out
++
++	neg		$enc,$ivp		# write [unaligned] iv
++	li		$idx,15			# 15 is not typo
++	vxor		$rndkey0,$rndkey0,$rndkey0
++	vspltisb	$outmask,-1
++	le?vspltisb	$tmp,0x0f
++	?lvsl		$outperm,0,$enc
++	?vperm		$outmask,$rndkey0,$outmask,$outperm
++	le?vxor		$outperm,$outperm,$tmp
++	lvx		$outhead,0,$ivp
++	vperm		$ivec,$ivec,$ivec,$outperm
++	vsel		$inout,$outhead,$ivec,$outmask
++	lvx		$inptail,$idx,$ivp
++	stvx		$inout,0,$ivp
++	vsel		$inout,$ivec,$inptail,$outmask
++	stvx		$inout,$idx,$ivp
++
++	mtspr		256,$vrsave
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0,0,6,0
++	.long		0
++___
++#########################################################################
++{{	# Optimized CBC decrypt procedure				#
++my $key_="r11";
++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
++my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
++			# v26-v31 last 6 round keys
++my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
++
++$code.=<<___;
++.align	5
++_aesp8_cbc_decrypt8x:
++	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++	li		r10,`$FRAME+8*16+15`
++	li		r11,`$FRAME+8*16+31`
++	stvx		v20,r10,$sp		# ABI says so
++	addi		r10,r10,32
++	stvx		v21,r11,$sp
++	addi		r11,r11,32
++	stvx		v22,r10,$sp
++	addi		r10,r10,32
++	stvx		v23,r11,$sp
++	addi		r11,r11,32
++	stvx		v24,r10,$sp
++	addi		r10,r10,32
++	stvx		v25,r11,$sp
++	addi		r11,r11,32
++	stvx		v26,r10,$sp
++	addi		r10,r10,32
++	stvx		v27,r11,$sp
++	addi		r11,r11,32
++	stvx		v28,r10,$sp
++	addi		r10,r10,32
++	stvx		v29,r11,$sp
++	addi		r11,r11,32
++	stvx		v30,r10,$sp
++	stvx		v31,r11,$sp
++	li		r0,-1
++	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
++	li		$x10,0x10
++	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++	li		$x20,0x20
++	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++	li		$x30,0x30
++	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++	li		$x40,0x40
++	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++	li		$x50,0x50
++	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++	li		$x60,0x60
++	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++	li		$x70,0x70
++	mtspr		256,r0
++
++	subi		$rounds,$rounds,3	# -4 in total
++	subi		$len,$len,128		# bias
++
++	lvx		$rndkey0,$x00,$key	# load key schedule
++	lvx		v30,$x10,$key
++	addi		$key,$key,0x20
++	lvx		v31,$x00,$key
++	?vperm		$rndkey0,$rndkey0,v30,$keyperm
++	addi		$key_,$sp,$FRAME+15
++	mtctr		$rounds
++
++Load_cbc_dec_key:
++	?vperm		v24,v30,v31,$keyperm
++	lvx		v30,$x10,$key
++	addi		$key,$key,0x20
++	stvx		v24,$x00,$key_		# off-load round[1]
++	?vperm		v25,v31,v30,$keyperm
++	lvx		v31,$x00,$key
++	stvx		v25,$x10,$key_		# off-load round[2]
++	addi		$key_,$key_,0x20
++	bdnz		Load_cbc_dec_key
++
++	lvx		v26,$x10,$key
++	?vperm		v24,v30,v31,$keyperm
++	lvx		v27,$x20,$key
++	stvx		v24,$x00,$key_		# off-load round[3]
++	?vperm		v25,v31,v26,$keyperm
++	lvx		v28,$x30,$key
++	stvx		v25,$x10,$key_		# off-load round[4]
++	addi		$key_,$sp,$FRAME+15	# rewind $key_
++	?vperm		v26,v26,v27,$keyperm
++	lvx		v29,$x40,$key
++	?vperm		v27,v27,v28,$keyperm
++	lvx		v30,$x50,$key
++	?vperm		v28,v28,v29,$keyperm
++	lvx		v31,$x60,$key
++	?vperm		v29,v29,v30,$keyperm
++	lvx		$out0,$x70,$key		# borrow $out0
++	?vperm		v30,v30,v31,$keyperm
++	lvx		v24,$x00,$key_		# pre-load round[1]
++	?vperm		v31,v31,$out0,$keyperm
++	lvx		v25,$x10,$key_		# pre-load round[2]
++
++	#lvx		$inptail,0,$inp		# "caller" already did this
++	#addi		$inp,$inp,15		# 15 is not typo
++	subi		$inp,$inp,15		# undo "caller"
++
++	 le?li		$idx,8
++	lvx_u		$in0,$x00,$inp		# load first 8 "words"
++	 le?lvsl	$inpperm,0,$idx
++	 le?vspltisb	$tmp,0x0f
++	lvx_u		$in1,$x10,$inp
++	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
++	lvx_u		$in2,$x20,$inp
++	 le?vperm	$in0,$in0,$in0,$inpperm
++	lvx_u		$in3,$x30,$inp
++	 le?vperm	$in1,$in1,$in1,$inpperm
++	lvx_u		$in4,$x40,$inp
++	 le?vperm	$in2,$in2,$in2,$inpperm
++	vxor		$out0,$in0,$rndkey0
++	lvx_u		$in5,$x50,$inp
++	 le?vperm	$in3,$in3,$in3,$inpperm
++	vxor		$out1,$in1,$rndkey0
++	lvx_u		$in6,$x60,$inp
++	 le?vperm	$in4,$in4,$in4,$inpperm
++	vxor		$out2,$in2,$rndkey0
++	lvx_u		$in7,$x70,$inp
++	addi		$inp,$inp,0x80
++	 le?vperm	$in5,$in5,$in5,$inpperm
++	vxor		$out3,$in3,$rndkey0
++	 le?vperm	$in6,$in6,$in6,$inpperm
++	vxor		$out4,$in4,$rndkey0
++	 le?vperm	$in7,$in7,$in7,$inpperm
++	vxor		$out5,$in5,$rndkey0
++	vxor		$out6,$in6,$rndkey0
++	vxor		$out7,$in7,$rndkey0
++
++	mtctr		$rounds
++	b		Loop_cbc_dec8x
++.align	5
++Loop_cbc_dec8x:
++	vncipher	$out0,$out0,v24
++	vncipher	$out1,$out1,v24
++	vncipher	$out2,$out2,v24
++	vncipher	$out3,$out3,v24
++	vncipher	$out4,$out4,v24
++	vncipher	$out5,$out5,v24
++	vncipher	$out6,$out6,v24
++	vncipher	$out7,$out7,v24
++	lvx		v24,$x20,$key_		# round[3]
++	addi		$key_,$key_,0x20
++
++	vncipher	$out0,$out0,v25
++	vncipher	$out1,$out1,v25
++	vncipher	$out2,$out2,v25
++	vncipher	$out3,$out3,v25
++	vncipher	$out4,$out4,v25
++	vncipher	$out5,$out5,v25
++	vncipher	$out6,$out6,v25
++	vncipher	$out7,$out7,v25
++	lvx		v25,$x10,$key_		# round[4]
++	bdnz		Loop_cbc_dec8x
++
++	subic		$len,$len,128		# $len-=128
++	vncipher	$out0,$out0,v24
++	vncipher	$out1,$out1,v24
++	vncipher	$out2,$out2,v24
++	vncipher	$out3,$out3,v24
++	vncipher	$out4,$out4,v24
++	vncipher	$out5,$out5,v24
++	vncipher	$out6,$out6,v24
++	vncipher	$out7,$out7,v24
++
++	subfe.		r0,r0,r0		# borrow?-1:0
++	vncipher	$out0,$out0,v25
++	vncipher	$out1,$out1,v25
++	vncipher	$out2,$out2,v25
++	vncipher	$out3,$out3,v25
++	vncipher	$out4,$out4,v25
++	vncipher	$out5,$out5,v25
++	vncipher	$out6,$out6,v25
++	vncipher	$out7,$out7,v25
++
++	and		r0,r0,$len
++	vncipher	$out0,$out0,v26
++	vncipher	$out1,$out1,v26
++	vncipher	$out2,$out2,v26
++	vncipher	$out3,$out3,v26
++	vncipher	$out4,$out4,v26
++	vncipher	$out5,$out5,v26
++	vncipher	$out6,$out6,v26
++	vncipher	$out7,$out7,v26
++
++	add		$inp,$inp,r0		# $inp is adjusted in such
++						# way that at exit from the
++						# loop inX-in7 are loaded
++						# with last "words"
++	vncipher	$out0,$out0,v27
++	vncipher	$out1,$out1,v27
++	vncipher	$out2,$out2,v27
++	vncipher	$out3,$out3,v27
++	vncipher	$out4,$out4,v27
++	vncipher	$out5,$out5,v27
++	vncipher	$out6,$out6,v27
++	vncipher	$out7,$out7,v27
++
++	addi		$key_,$sp,$FRAME+15	# rewind $key_
++	vncipher	$out0,$out0,v28
++	vncipher	$out1,$out1,v28
++	vncipher	$out2,$out2,v28
++	vncipher	$out3,$out3,v28
++	vncipher	$out4,$out4,v28
++	vncipher	$out5,$out5,v28
++	vncipher	$out6,$out6,v28
++	vncipher	$out7,$out7,v28
++	lvx		v24,$x00,$key_		# re-pre-load round[1]
++
++	vncipher	$out0,$out0,v29
++	vncipher	$out1,$out1,v29
++	vncipher	$out2,$out2,v29
++	vncipher	$out3,$out3,v29
++	vncipher	$out4,$out4,v29
++	vncipher	$out5,$out5,v29
++	vncipher	$out6,$out6,v29
++	vncipher	$out7,$out7,v29
++	lvx		v25,$x10,$key_		# re-pre-load round[2]
++
++	vncipher	$out0,$out0,v30
++	 vxor		$ivec,$ivec,v31		# xor with last round key
++	vncipher	$out1,$out1,v30
++	 vxor		$in0,$in0,v31
++	vncipher	$out2,$out2,v30
++	 vxor		$in1,$in1,v31
++	vncipher	$out3,$out3,v30
++	 vxor		$in2,$in2,v31
++	vncipher	$out4,$out4,v30
++	 vxor		$in3,$in3,v31
++	vncipher	$out5,$out5,v30
++	 vxor		$in4,$in4,v31
++	vncipher	$out6,$out6,v30
++	 vxor		$in5,$in5,v31
++	vncipher	$out7,$out7,v30
++	 vxor		$in6,$in6,v31
++
++	vncipherlast	$out0,$out0,$ivec
++	vncipherlast	$out1,$out1,$in0
++	 lvx_u		$in0,$x00,$inp		# load next input block
++	vncipherlast	$out2,$out2,$in1
++	 lvx_u		$in1,$x10,$inp
++	vncipherlast	$out3,$out3,$in2
++	 le?vperm	$in0,$in0,$in0,$inpperm
++	 lvx_u		$in2,$x20,$inp
++	vncipherlast	$out4,$out4,$in3
++	 le?vperm	$in1,$in1,$in1,$inpperm
++	 lvx_u		$in3,$x30,$inp
++	vncipherlast	$out5,$out5,$in4
++	 le?vperm	$in2,$in2,$in2,$inpperm
++	 lvx_u		$in4,$x40,$inp
++	vncipherlast	$out6,$out6,$in5
++	 le?vperm	$in3,$in3,$in3,$inpperm
++	 lvx_u		$in5,$x50,$inp
++	vncipherlast	$out7,$out7,$in6
++	 le?vperm	$in4,$in4,$in4,$inpperm
++	 lvx_u		$in6,$x60,$inp
++	vmr		$ivec,$in7
++	 le?vperm	$in5,$in5,$in5,$inpperm
++	 lvx_u		$in7,$x70,$inp
++	 addi		$inp,$inp,0x80
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	 le?vperm	$in6,$in6,$in6,$inpperm
++	 vxor		$out0,$in0,$rndkey0
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x10,$out
++	 le?vperm	$in7,$in7,$in7,$inpperm
++	 vxor		$out1,$in1,$rndkey0
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x20,$out
++	 vxor		$out2,$in2,$rndkey0
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x30,$out
++	 vxor		$out3,$in3,$rndkey0
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x40,$out
++	 vxor		$out4,$in4,$rndkey0
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x50,$out
++	 vxor		$out5,$in5,$rndkey0
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x60,$out
++	 vxor		$out6,$in6,$rndkey0
++	stvx_u		$out7,$x70,$out
++	addi		$out,$out,0x80
++	 vxor		$out7,$in7,$rndkey0
++
++	mtctr		$rounds
++	beq		Loop_cbc_dec8x		# did $len-=128 borrow?
++
++	addic.		$len,$len,128
++	beq		Lcbc_dec8x_done
++	nop
++	nop
++
++Loop_cbc_dec8x_tail:				# up to 7 "words" tail...
++	vncipher	$out1,$out1,v24
++	vncipher	$out2,$out2,v24
++	vncipher	$out3,$out3,v24
++	vncipher	$out4,$out4,v24
++	vncipher	$out5,$out5,v24
++	vncipher	$out6,$out6,v24
++	vncipher	$out7,$out7,v24
++	lvx		v24,$x20,$key_		# round[3]
++	addi		$key_,$key_,0x20
++
++	vncipher	$out1,$out1,v25
++	vncipher	$out2,$out2,v25
++	vncipher	$out3,$out3,v25
++	vncipher	$out4,$out4,v25
++	vncipher	$out5,$out5,v25
++	vncipher	$out6,$out6,v25
++	vncipher	$out7,$out7,v25
++	lvx		v25,$x10,$key_		# round[4]
++	bdnz		Loop_cbc_dec8x_tail
++
++	vncipher	$out1,$out1,v24
++	vncipher	$out2,$out2,v24
++	vncipher	$out3,$out3,v24
++	vncipher	$out4,$out4,v24
++	vncipher	$out5,$out5,v24
++	vncipher	$out6,$out6,v24
++	vncipher	$out7,$out7,v24
++
++	vncipher	$out1,$out1,v25
++	vncipher	$out2,$out2,v25
++	vncipher	$out3,$out3,v25
++	vncipher	$out4,$out4,v25
++	vncipher	$out5,$out5,v25
++	vncipher	$out6,$out6,v25
++	vncipher	$out7,$out7,v25
++
++	vncipher	$out1,$out1,v26
++	vncipher	$out2,$out2,v26
++	vncipher	$out3,$out3,v26
++	vncipher	$out4,$out4,v26
++	vncipher	$out5,$out5,v26
++	vncipher	$out6,$out6,v26
++	vncipher	$out7,$out7,v26
++
++	vncipher	$out1,$out1,v27
++	vncipher	$out2,$out2,v27
++	vncipher	$out3,$out3,v27
++	vncipher	$out4,$out4,v27
++	vncipher	$out5,$out5,v27
++	vncipher	$out6,$out6,v27
++	vncipher	$out7,$out7,v27
++
++	vncipher	$out1,$out1,v28
++	vncipher	$out2,$out2,v28
++	vncipher	$out3,$out3,v28
++	vncipher	$out4,$out4,v28
++	vncipher	$out5,$out5,v28
++	vncipher	$out6,$out6,v28
++	vncipher	$out7,$out7,v28
++
++	vncipher	$out1,$out1,v29
++	vncipher	$out2,$out2,v29
++	vncipher	$out3,$out3,v29
++	vncipher	$out4,$out4,v29
++	vncipher	$out5,$out5,v29
++	vncipher	$out6,$out6,v29
++	vncipher	$out7,$out7,v29
++
++	vncipher	$out1,$out1,v30
++	 vxor		$ivec,$ivec,v31		# last round key
++	vncipher	$out2,$out2,v30
++	 vxor		$in1,$in1,v31
++	vncipher	$out3,$out3,v30
++	 vxor		$in2,$in2,v31
++	vncipher	$out4,$out4,v30
++	 vxor		$in3,$in3,v31
++	vncipher	$out5,$out5,v30
++	 vxor		$in4,$in4,v31
++	vncipher	$out6,$out6,v30
++	 vxor		$in5,$in5,v31
++	vncipher	$out7,$out7,v30
++	 vxor		$in6,$in6,v31
++
++	cmplwi		$len,32			# switch($len)
++	blt		Lcbc_dec8x_one
++	nop
++	beq		Lcbc_dec8x_two
++	cmplwi		$len,64
++	blt		Lcbc_dec8x_three
++	nop
++	beq		Lcbc_dec8x_four
++	cmplwi		$len,96
++	blt		Lcbc_dec8x_five
++	nop
++	beq		Lcbc_dec8x_six
++
++Lcbc_dec8x_seven:
++	vncipherlast	$out1,$out1,$ivec
++	vncipherlast	$out2,$out2,$in1
++	vncipherlast	$out3,$out3,$in2
++	vncipherlast	$out4,$out4,$in3
++	vncipherlast	$out5,$out5,$in4
++	vncipherlast	$out6,$out6,$in5
++	vncipherlast	$out7,$out7,$in6
++	vmr		$ivec,$in7
++
++	le?vperm	$out1,$out1,$out1,$inpperm
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x00,$out
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x10,$out
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x20,$out
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x30,$out
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x40,$out
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x50,$out
++	stvx_u		$out7,$x60,$out
++	addi		$out,$out,0x70
++	b		Lcbc_dec8x_done
++
++.align	5
++Lcbc_dec8x_six:
++	vncipherlast	$out2,$out2,$ivec
++	vncipherlast	$out3,$out3,$in2
++	vncipherlast	$out4,$out4,$in3
++	vncipherlast	$out5,$out5,$in4
++	vncipherlast	$out6,$out6,$in5
++	vncipherlast	$out7,$out7,$in6
++	vmr		$ivec,$in7
++
++	le?vperm	$out2,$out2,$out2,$inpperm
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x00,$out
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x10,$out
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x20,$out
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x30,$out
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x40,$out
++	stvx_u		$out7,$x50,$out
++	addi		$out,$out,0x60
++	b		Lcbc_dec8x_done
++
++.align	5
++Lcbc_dec8x_five:
++	vncipherlast	$out3,$out3,$ivec
++	vncipherlast	$out4,$out4,$in3
++	vncipherlast	$out5,$out5,$in4
++	vncipherlast	$out6,$out6,$in5
++	vncipherlast	$out7,$out7,$in6
++	vmr		$ivec,$in7
++
++	le?vperm	$out3,$out3,$out3,$inpperm
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x00,$out
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x10,$out
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x20,$out
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x30,$out
++	stvx_u		$out7,$x40,$out
++	addi		$out,$out,0x50
++	b		Lcbc_dec8x_done
++
++.align	5
++Lcbc_dec8x_four:
++	vncipherlast	$out4,$out4,$ivec
++	vncipherlast	$out5,$out5,$in4
++	vncipherlast	$out6,$out6,$in5
++	vncipherlast	$out7,$out7,$in6
++	vmr		$ivec,$in7
++
++	le?vperm	$out4,$out4,$out4,$inpperm
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x00,$out
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x10,$out
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x20,$out
++	stvx_u		$out7,$x30,$out
++	addi		$out,$out,0x40
++	b		Lcbc_dec8x_done
++
++.align	5
++Lcbc_dec8x_three:
++	vncipherlast	$out5,$out5,$ivec
++	vncipherlast	$out6,$out6,$in5
++	vncipherlast	$out7,$out7,$in6
++	vmr		$ivec,$in7
++
++	le?vperm	$out5,$out5,$out5,$inpperm
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x00,$out
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x10,$out
++	stvx_u		$out7,$x20,$out
++	addi		$out,$out,0x30
++	b		Lcbc_dec8x_done
++
++.align	5
++Lcbc_dec8x_two:
++	vncipherlast	$out6,$out6,$ivec
++	vncipherlast	$out7,$out7,$in6
++	vmr		$ivec,$in7
++
++	le?vperm	$out6,$out6,$out6,$inpperm
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x00,$out
++	stvx_u		$out7,$x10,$out
++	addi		$out,$out,0x20
++	b		Lcbc_dec8x_done
++
++.align	5
++Lcbc_dec8x_one:
++	vncipherlast	$out7,$out7,$ivec
++	vmr		$ivec,$in7
++
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out7,0,$out
++	addi		$out,$out,0x10
++
++Lcbc_dec8x_done:
++	le?vperm	$ivec,$ivec,$ivec,$inpperm
++	stvx_u		$ivec,0,$ivp		# write [unaligned] iv
++
++	li		r10,`$FRAME+15`
++	li		r11,`$FRAME+31`
++	stvx		$inpperm,r10,$sp	# wipe copies of round keys
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++	stvx		$inpperm,r10,$sp
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++	stvx		$inpperm,r10,$sp
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++	stvx		$inpperm,r10,$sp
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++
++	mtspr		256,$vrsave
++	lvx		v20,r10,$sp		# ABI says so
++	addi		r10,r10,32
++	lvx		v21,r11,$sp
++	addi		r11,r11,32
++	lvx		v22,r10,$sp
++	addi		r10,r10,32
++	lvx		v23,r11,$sp
++	addi		r11,r11,32
++	lvx		v24,r10,$sp
++	addi		r10,r10,32
++	lvx		v25,r11,$sp
++	addi		r11,r11,32
++	lvx		v26,r10,$sp
++	addi		r10,r10,32
++	lvx		v27,r11,$sp
++	addi		r11,r11,32
++	lvx		v28,r10,$sp
++	addi		r10,r10,32
++	lvx		v29,r11,$sp
++	addi		r11,r11,32
++	lvx		v30,r10,$sp
++	lvx		v31,r11,$sp
++	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0x80,6,6,0
++	.long		0
++.size	.${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
++___
++}}	}}}
++
++#########################################################################
++{{{	# CTR procedure[s]						#
++my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
++my ($rndkey0,$rndkey1,$inout,$tmp)=		map("v$_",(0..3));
++my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
++						map("v$_",(4..11));
++my $dat=$tmp;
++
++$code.=<<___;
++.globl	.${prefix}_ctr32_encrypt_blocks
++.align	5
++.${prefix}_ctr32_encrypt_blocks:
++	${UCMP}i	$len,1
++	bltlr-
++
++	lis		r0,0xfff0
++	mfspr		$vrsave,256
++	mtspr		256,r0
++
++	li		$idx,15
++	vxor		$rndkey0,$rndkey0,$rndkey0
++	le?vspltisb	$tmp,0x0f
++
++	lvx		$ivec,0,$ivp		# load [unaligned] iv
++	lvsl		$inpperm,0,$ivp
++	lvx		$inptail,$idx,$ivp
++	 vspltisb	$one,1
++	le?vxor		$inpperm,$inpperm,$tmp
++	vperm		$ivec,$ivec,$inptail,$inpperm
++	 vsldoi		$one,$rndkey0,$one,1
++
++	neg		r11,$inp
++	?lvsl		$keyperm,0,$key		# prepare for unaligned key
++	lwz		$rounds,240($key)
++
++	lvsr		$inpperm,0,r11		# prepare for unaligned load
++	lvx		$inptail,0,$inp
++	addi		$inp,$inp,15		# 15 is not typo
++	le?vxor		$inpperm,$inpperm,$tmp
++
++	srwi		$rounds,$rounds,1
++	li		$idx,16
++	subi		$rounds,$rounds,1
++
++	${UCMP}i	$len,8
++	bge		_aesp8_ctr32_encrypt8x
++
++	?lvsr		$outperm,0,$out		# prepare for unaligned store
++	vspltisb	$outmask,-1
++	lvx		$outhead,0,$out
++	?vperm		$outmask,$rndkey0,$outmask,$outperm
++	le?vxor		$outperm,$outperm,$tmp
++
++	lvx		$rndkey0,0,$key
++	mtctr		$rounds
++	lvx		$rndkey1,$idx,$key
++	addi		$idx,$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vxor		$inout,$ivec,$rndkey0
++	lvx		$rndkey0,$idx,$key
++	addi		$idx,$idx,16
++	b		Loop_ctr32_enc
++
++.align	5
++Loop_ctr32_enc:
++	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
++	vcipher		$inout,$inout,$rndkey1
++	lvx		$rndkey1,$idx,$key
++	addi		$idx,$idx,16
++	?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vcipher		$inout,$inout,$rndkey0
++	lvx		$rndkey0,$idx,$key
++	addi		$idx,$idx,16
++	bdnz		Loop_ctr32_enc
++
++	vadduwm		$ivec,$ivec,$one
++	 vmr		$dat,$inptail
++	 lvx		$inptail,0,$inp
++	 addi		$inp,$inp,16
++	 subic.		$len,$len,1		# blocks--
++
++	?vperm		$rndkey1,$rndkey1,$rndkey0,$keyperm
++	vcipher		$inout,$inout,$rndkey1
++	lvx		$rndkey1,$idx,$key
++	 vperm		$dat,$dat,$inptail,$inpperm
++	 li		$idx,16
++	?vperm		$rndkey1,$rndkey0,$rndkey1,$keyperm
++	 lvx		$rndkey0,0,$key
++	vxor		$dat,$dat,$rndkey1	# last round key
++	vcipherlast	$inout,$inout,$dat
++
++	 lvx		$rndkey1,$idx,$key
++	 addi		$idx,$idx,16
++	vperm		$inout,$inout,$inout,$outperm
++	vsel		$dat,$outhead,$inout,$outmask
++	 mtctr		$rounds
++	 ?vperm		$rndkey0,$rndkey0,$rndkey1,$keyperm
++	vmr		$outhead,$inout
++	 vxor		$inout,$ivec,$rndkey0
++	 lvx		$rndkey0,$idx,$key
++	 addi		$idx,$idx,16
++	stvx		$dat,0,$out
++	addi		$out,$out,16
++	bne		Loop_ctr32_enc
++
++	addi		$out,$out,-1
++	lvx		$inout,0,$out		# redundant in aligned case
++	vsel		$inout,$outhead,$inout,$outmask
++	stvx		$inout,0,$out
++
++	mtspr		256,$vrsave
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0,0,6,0
++	.long		0
++___
++#########################################################################
++{{	# Optimized CTR procedure					#
++my $key_="r11";
++my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
++my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
++my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
++my $rndkey0="v23";	# v24-v25 rotating buffer for first found keys
++			# v26-v31 last 6 round keys
++my ($tmp,$keyperm)=($in3,$in4);	# aliases with "caller", redundant assignment
++my ($two,$three,$four)=($outhead,$outperm,$outmask);
++
++$code.=<<___;
++.align	5
++_aesp8_ctr32_encrypt8x:
++	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++	li		r10,`$FRAME+8*16+15`
++	li		r11,`$FRAME+8*16+31`
++	stvx		v20,r10,$sp		# ABI says so
++	addi		r10,r10,32
++	stvx		v21,r11,$sp
++	addi		r11,r11,32
++	stvx		v22,r10,$sp
++	addi		r10,r10,32
++	stvx		v23,r11,$sp
++	addi		r11,r11,32
++	stvx		v24,r10,$sp
++	addi		r10,r10,32
++	stvx		v25,r11,$sp
++	addi		r11,r11,32
++	stvx		v26,r10,$sp
++	addi		r10,r10,32
++	stvx		v27,r11,$sp
++	addi		r11,r11,32
++	stvx		v28,r10,$sp
++	addi		r10,r10,32
++	stvx		v29,r11,$sp
++	addi		r11,r11,32
++	stvx		v30,r10,$sp
++	stvx		v31,r11,$sp
++	li		r0,-1
++	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
++	li		$x10,0x10
++	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++	li		$x20,0x20
++	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++	li		$x30,0x30
++	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++	li		$x40,0x40
++	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++	li		$x50,0x50
++	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++	li		$x60,0x60
++	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++	li		$x70,0x70
++	mtspr		256,r0
++
++	subi		$rounds,$rounds,3	# -4 in total
++
++	lvx		$rndkey0,$x00,$key	# load key schedule
++	lvx		v30,$x10,$key
++	addi		$key,$key,0x20
++	lvx		v31,$x00,$key
++	?vperm		$rndkey0,$rndkey0,v30,$keyperm
++	addi		$key_,$sp,$FRAME+15
++	mtctr		$rounds
++
++Load_ctr32_enc_key:
++	?vperm		v24,v30,v31,$keyperm
++	lvx		v30,$x10,$key
++	addi		$key,$key,0x20
++	stvx		v24,$x00,$key_		# off-load round[1]
++	?vperm		v25,v31,v30,$keyperm
++	lvx		v31,$x00,$key
++	stvx		v25,$x10,$key_		# off-load round[2]
++	addi		$key_,$key_,0x20
++	bdnz		Load_ctr32_enc_key
++
++	lvx		v26,$x10,$key
++	?vperm		v24,v30,v31,$keyperm
++	lvx		v27,$x20,$key
++	stvx		v24,$x00,$key_		# off-load round[3]
++	?vperm		v25,v31,v26,$keyperm
++	lvx		v28,$x30,$key
++	stvx		v25,$x10,$key_		# off-load round[4]
++	addi		$key_,$sp,$FRAME+15	# rewind $key_
++	?vperm		v26,v26,v27,$keyperm
++	lvx		v29,$x40,$key
++	?vperm		v27,v27,v28,$keyperm
++	lvx		v30,$x50,$key
++	?vperm		v28,v28,v29,$keyperm
++	lvx		v31,$x60,$key
++	?vperm		v29,v29,v30,$keyperm
++	lvx		$out0,$x70,$key		# borrow $out0
++	?vperm		v30,v30,v31,$keyperm
++	lvx		v24,$x00,$key_		# pre-load round[1]
++	?vperm		v31,v31,$out0,$keyperm
++	lvx		v25,$x10,$key_		# pre-load round[2]
++
++	vadduwm		$two,$one,$one
++	subi		$inp,$inp,15		# undo "caller"
++	$SHL		$len,$len,4
++
++	vadduwm		$out1,$ivec,$one	# counter values ...
++	vadduwm		$out2,$ivec,$two
++	vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
++	 le?li		$idx,8
++	vadduwm		$out3,$out1,$two
++	vxor		$out1,$out1,$rndkey0
++	 le?lvsl	$inpperm,0,$idx
++	vadduwm		$out4,$out2,$two
++	vxor		$out2,$out2,$rndkey0
++	 le?vspltisb	$tmp,0x0f
++	vadduwm		$out5,$out3,$two
++	vxor		$out3,$out3,$rndkey0
++	 le?vxor	$inpperm,$inpperm,$tmp	# transform for lvx_u/stvx_u
++	vadduwm		$out6,$out4,$two
++	vxor		$out4,$out4,$rndkey0
++	vadduwm		$out7,$out5,$two
++	vxor		$out5,$out5,$rndkey0
++	vadduwm		$ivec,$out6,$two	# next counter value
++	vxor		$out6,$out6,$rndkey0
++	vxor		$out7,$out7,$rndkey0
++
++	mtctr		$rounds
++	b		Loop_ctr32_enc8x
++.align	5
++Loop_ctr32_enc8x:
++	vcipher 	$out0,$out0,v24
++	vcipher 	$out1,$out1,v24
++	vcipher 	$out2,$out2,v24
++	vcipher 	$out3,$out3,v24
++	vcipher 	$out4,$out4,v24
++	vcipher 	$out5,$out5,v24
++	vcipher 	$out6,$out6,v24
++	vcipher 	$out7,$out7,v24
++Loop_ctr32_enc8x_middle:
++	lvx		v24,$x20,$key_		# round[3]
++	addi		$key_,$key_,0x20
++
++	vcipher 	$out0,$out0,v25
++	vcipher 	$out1,$out1,v25
++	vcipher 	$out2,$out2,v25
++	vcipher 	$out3,$out3,v25
++	vcipher 	$out4,$out4,v25
++	vcipher 	$out5,$out5,v25
++	vcipher 	$out6,$out6,v25
++	vcipher 	$out7,$out7,v25
++	lvx		v25,$x10,$key_		# round[4]
++	bdnz		Loop_ctr32_enc8x
++
++	subic		r11,$len,256		# $len-256, borrow $key_
++	vcipher 	$out0,$out0,v24
++	vcipher 	$out1,$out1,v24
++	vcipher 	$out2,$out2,v24
++	vcipher 	$out3,$out3,v24
++	vcipher 	$out4,$out4,v24
++	vcipher 	$out5,$out5,v24
++	vcipher 	$out6,$out6,v24
++	vcipher 	$out7,$out7,v24
++
++	subfe		r0,r0,r0		# borrow?-1:0
++	vcipher 	$out0,$out0,v25
++	vcipher 	$out1,$out1,v25
++	vcipher 	$out2,$out2,v25
++	vcipher 	$out3,$out3,v25
++	vcipher 	$out4,$out4,v25
++	vcipher		$out5,$out5,v25
++	vcipher		$out6,$out6,v25
++	vcipher		$out7,$out7,v25
++
++	and		r0,r0,r11
++	addi		$key_,$sp,$FRAME+15	# rewind $key_
++	vcipher		$out0,$out0,v26
++	vcipher		$out1,$out1,v26
++	vcipher		$out2,$out2,v26
++	vcipher		$out3,$out3,v26
++	vcipher		$out4,$out4,v26
++	vcipher		$out5,$out5,v26
++	vcipher		$out6,$out6,v26
++	vcipher		$out7,$out7,v26
++	lvx		v24,$x00,$key_		# re-pre-load round[1]
++
++	subic		$len,$len,129		# $len-=129
++	vcipher		$out0,$out0,v27
++	addi		$len,$len,1		# $len-=128 really
++	vcipher		$out1,$out1,v27
++	vcipher		$out2,$out2,v27
++	vcipher		$out3,$out3,v27
++	vcipher		$out4,$out4,v27
++	vcipher		$out5,$out5,v27
++	vcipher		$out6,$out6,v27
++	vcipher		$out7,$out7,v27
++	lvx		v25,$x10,$key_		# re-pre-load round[2]
++
++	vcipher		$out0,$out0,v28
++	 lvx_u		$in0,$x00,$inp		# load input
++	vcipher		$out1,$out1,v28
++	 lvx_u		$in1,$x10,$inp
++	vcipher		$out2,$out2,v28
++	 lvx_u		$in2,$x20,$inp
++	vcipher		$out3,$out3,v28
++	 lvx_u		$in3,$x30,$inp
++	vcipher		$out4,$out4,v28
++	 lvx_u		$in4,$x40,$inp
++	vcipher		$out5,$out5,v28
++	 lvx_u		$in5,$x50,$inp
++	vcipher		$out6,$out6,v28
++	 lvx_u		$in6,$x60,$inp
++	vcipher		$out7,$out7,v28
++	 lvx_u		$in7,$x70,$inp
++	 addi		$inp,$inp,0x80
++
++	vcipher		$out0,$out0,v29
++	 le?vperm	$in0,$in0,$in0,$inpperm
++	vcipher		$out1,$out1,v29
++	 le?vperm	$in1,$in1,$in1,$inpperm
++	vcipher		$out2,$out2,v29
++	 le?vperm	$in2,$in2,$in2,$inpperm
++	vcipher		$out3,$out3,v29
++	 le?vperm	$in3,$in3,$in3,$inpperm
++	vcipher		$out4,$out4,v29
++	 le?vperm	$in4,$in4,$in4,$inpperm
++	vcipher		$out5,$out5,v29
++	 le?vperm	$in5,$in5,$in5,$inpperm
++	vcipher		$out6,$out6,v29
++	 le?vperm	$in6,$in6,$in6,$inpperm
++	vcipher		$out7,$out7,v29
++	 le?vperm	$in7,$in7,$in7,$inpperm
++
++	add		$inp,$inp,r0		# $inp is adjusted in such
++						# way that at exit from the
++						# loop inX-in7 are loaded
++						# with last "words"
++	subfe.		r0,r0,r0		# borrow?-1:0
++	vcipher		$out0,$out0,v30
++	 vxor		$in0,$in0,v31		# xor with last round key
++	vcipher		$out1,$out1,v30
++	 vxor		$in1,$in1,v31
++	vcipher		$out2,$out2,v30
++	 vxor		$in2,$in2,v31
++	vcipher		$out3,$out3,v30
++	 vxor		$in3,$in3,v31
++	vcipher		$out4,$out4,v30
++	 vxor		$in4,$in4,v31
++	vcipher		$out5,$out5,v30
++	 vxor		$in5,$in5,v31
++	vcipher		$out6,$out6,v30
++	 vxor		$in6,$in6,v31
++	vcipher		$out7,$out7,v30
++	 vxor		$in7,$in7,v31
++
++	bne		Lctr32_enc8x_break	# did $len-129 borrow?
++
++	vcipherlast	$in0,$out0,$in0
++	vcipherlast	$in1,$out1,$in1
++	 vadduwm	$out1,$ivec,$one	# counter values ...
++	vcipherlast	$in2,$out2,$in2
++	 vadduwm	$out2,$ivec,$two
++	 vxor		$out0,$ivec,$rndkey0	# ... xored with rndkey[0]
++	vcipherlast	$in3,$out3,$in3
++	 vadduwm	$out3,$out1,$two
++	 vxor		$out1,$out1,$rndkey0
++	vcipherlast	$in4,$out4,$in4
++	 vadduwm	$out4,$out2,$two
++	 vxor		$out2,$out2,$rndkey0
++	vcipherlast	$in5,$out5,$in5
++	 vadduwm	$out5,$out3,$two
++	 vxor		$out3,$out3,$rndkey0
++	vcipherlast	$in6,$out6,$in6
++	 vadduwm	$out6,$out4,$two
++	 vxor		$out4,$out4,$rndkey0
++	vcipherlast	$in7,$out7,$in7
++	 vadduwm	$out7,$out5,$two
++	 vxor		$out5,$out5,$rndkey0
++	le?vperm	$in0,$in0,$in0,$inpperm
++	 vadduwm	$ivec,$out6,$two	# next counter value
++	 vxor		$out6,$out6,$rndkey0
++	le?vperm	$in1,$in1,$in1,$inpperm
++	 vxor		$out7,$out7,$rndkey0
++	mtctr		$rounds
++
++	 vcipher	$out0,$out0,v24
++	stvx_u		$in0,$x00,$out
++	le?vperm	$in2,$in2,$in2,$inpperm
++	 vcipher	$out1,$out1,v24
++	stvx_u		$in1,$x10,$out
++	le?vperm	$in3,$in3,$in3,$inpperm
++	 vcipher	$out2,$out2,v24
++	stvx_u		$in2,$x20,$out
++	le?vperm	$in4,$in4,$in4,$inpperm
++	 vcipher	$out3,$out3,v24
++	stvx_u		$in3,$x30,$out
++	le?vperm	$in5,$in5,$in5,$inpperm
++	 vcipher	$out4,$out4,v24
++	stvx_u		$in4,$x40,$out
++	le?vperm	$in6,$in6,$in6,$inpperm
++	 vcipher	$out5,$out5,v24
++	stvx_u		$in5,$x50,$out
++	le?vperm	$in7,$in7,$in7,$inpperm
++	 vcipher	$out6,$out6,v24
++	stvx_u		$in6,$x60,$out
++	 vcipher	$out7,$out7,v24
++	stvx_u		$in7,$x70,$out
++	addi		$out,$out,0x80
++
++	b		Loop_ctr32_enc8x_middle
++
++.align	5
++Lctr32_enc8x_break:
++	cmpwi		$len,-0x60
++	blt		Lctr32_enc8x_one
++	nop
++	beq		Lctr32_enc8x_two
++	cmpwi		$len,-0x40
++	blt		Lctr32_enc8x_three
++	nop
++	beq		Lctr32_enc8x_four
++	cmpwi		$len,-0x20
++	blt		Lctr32_enc8x_five
++	nop
++	beq		Lctr32_enc8x_six
++	cmpwi		$len,0x00
++	blt		Lctr32_enc8x_seven
++
++Lctr32_enc8x_eight:
++	vcipherlast	$out0,$out0,$in0
++	vcipherlast	$out1,$out1,$in1
++	vcipherlast	$out2,$out2,$in2
++	vcipherlast	$out3,$out3,$in3
++	vcipherlast	$out4,$out4,$in4
++	vcipherlast	$out5,$out5,$in5
++	vcipherlast	$out6,$out6,$in6
++	vcipherlast	$out7,$out7,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x10,$out
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x20,$out
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x30,$out
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x40,$out
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x50,$out
++	le?vperm	$out7,$out7,$out7,$inpperm
++	stvx_u		$out6,$x60,$out
++	stvx_u		$out7,$x70,$out
++	addi		$out,$out,0x80
++	b		Lctr32_enc8x_done
++
++.align	5
++Lctr32_enc8x_seven:
++	vcipherlast	$out0,$out0,$in1
++	vcipherlast	$out1,$out1,$in2
++	vcipherlast	$out2,$out2,$in3
++	vcipherlast	$out3,$out3,$in4
++	vcipherlast	$out4,$out4,$in5
++	vcipherlast	$out5,$out5,$in6
++	vcipherlast	$out6,$out6,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x10,$out
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x20,$out
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x30,$out
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x40,$out
++	le?vperm	$out6,$out6,$out6,$inpperm
++	stvx_u		$out5,$x50,$out
++	stvx_u		$out6,$x60,$out
++	addi		$out,$out,0x70
++	b		Lctr32_enc8x_done
++
++.align	5
++Lctr32_enc8x_six:
++	vcipherlast	$out0,$out0,$in2
++	vcipherlast	$out1,$out1,$in3
++	vcipherlast	$out2,$out2,$in4
++	vcipherlast	$out3,$out3,$in5
++	vcipherlast	$out4,$out4,$in6
++	vcipherlast	$out5,$out5,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x10,$out
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x20,$out
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x30,$out
++	le?vperm	$out5,$out5,$out5,$inpperm
++	stvx_u		$out4,$x40,$out
++	stvx_u		$out5,$x50,$out
++	addi		$out,$out,0x60
++	b		Lctr32_enc8x_done
++
++.align	5
++Lctr32_enc8x_five:
++	vcipherlast	$out0,$out0,$in3
++	vcipherlast	$out1,$out1,$in4
++	vcipherlast	$out2,$out2,$in5
++	vcipherlast	$out3,$out3,$in6
++	vcipherlast	$out4,$out4,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x10,$out
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x20,$out
++	le?vperm	$out4,$out4,$out4,$inpperm
++	stvx_u		$out3,$x30,$out
++	stvx_u		$out4,$x40,$out
++	addi		$out,$out,0x50
++	b		Lctr32_enc8x_done
++
++.align	5
++Lctr32_enc8x_four:
++	vcipherlast	$out0,$out0,$in4
++	vcipherlast	$out1,$out1,$in5
++	vcipherlast	$out2,$out2,$in6
++	vcipherlast	$out3,$out3,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x10,$out
++	le?vperm	$out3,$out3,$out3,$inpperm
++	stvx_u		$out2,$x20,$out
++	stvx_u		$out3,$x30,$out
++	addi		$out,$out,0x40
++	b		Lctr32_enc8x_done
++
++.align	5
++Lctr32_enc8x_three:
++	vcipherlast	$out0,$out0,$in5
++	vcipherlast	$out1,$out1,$in6
++	vcipherlast	$out2,$out2,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	le?vperm	$out2,$out2,$out2,$inpperm
++	stvx_u		$out1,$x10,$out
++	stvx_u		$out2,$x20,$out
++	addi		$out,$out,0x30
++	b		Lcbc_dec8x_done
++
++.align	5
++Lctr32_enc8x_two:
++	vcipherlast	$out0,$out0,$in6
++	vcipherlast	$out1,$out1,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	le?vperm	$out1,$out1,$out1,$inpperm
++	stvx_u		$out0,$x00,$out
++	stvx_u		$out1,$x10,$out
++	addi		$out,$out,0x20
++	b		Lcbc_dec8x_done
++
++.align	5
++Lctr32_enc8x_one:
++	vcipherlast	$out0,$out0,$in7
++
++	le?vperm	$out0,$out0,$out0,$inpperm
++	stvx_u		$out0,0,$out
++	addi		$out,$out,0x10
++
++Lctr32_enc8x_done:
++	li		r10,`$FRAME+15`
++	li		r11,`$FRAME+31`
++	stvx		$inpperm,r10,$sp	# wipe copies of round keys
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++	stvx		$inpperm,r10,$sp
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++	stvx		$inpperm,r10,$sp
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++	stvx		$inpperm,r10,$sp
++	addi		r10,r10,32
++	stvx		$inpperm,r11,$sp
++	addi		r11,r11,32
++
++	mtspr		256,$vrsave
++	lvx		v20,r10,$sp		# ABI says so
++	addi		r10,r10,32
++	lvx		v21,r11,$sp
++	addi		r11,r11,32
++	lvx		v22,r10,$sp
++	addi		r10,r10,32
++	lvx		v23,r11,$sp
++	addi		r11,r11,32
++	lvx		v24,r10,$sp
++	addi		r10,r10,32
++	lvx		v25,r11,$sp
++	addi		r11,r11,32
++	lvx		v26,r10,$sp
++	addi		r10,r10,32
++	lvx		v27,r11,$sp
++	addi		r11,r11,32
++	lvx		v28,r10,$sp
++	addi		r10,r10,32
++	lvx		v29,r11,$sp
++	addi		r11,r11,32
++	lvx		v30,r10,$sp
++	lvx		v31,r11,$sp
++	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0x80,6,6,0
++	.long		0
++.size	.${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
++___
++}}	}}}
++
++my $consts=1;
++foreach(split("\n",$code)) {
++        s/\`([^\`]*)\`/eval($1)/geo;
++
++	# constants table endian-specific conversion
++	if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
++	    my $conv=$3;
++	    my @bytes=();
++
++	    # convert to endian-agnostic format
++	    if ($1 eq "long") {
++	      foreach (split(/,\s*/,$2)) {
++		my $l = /^0/?oct:int;
++		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
++	      }
++	    } else {
++		@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
++	    }
++
++	    # little-endian conversion
++	    if ($flavour =~ /le$/o) {
++		SWITCH: for($conv)  {
++		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
++		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
++		}
++	    }
++
++	    #emit
++	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
++	    next;
++	}
++	$consts=0 if (m/Lconsts:/o);	# end of table
++
++	# instructions prefixed with '?' are endian-specific and need
++	# to be adjusted accordingly...
++	if ($flavour =~ /le$/o) {	# little-endian
++	    s/le\?//o		or
++	    s/be\?/#be#/o	or
++	    s/\?lvsr/lvsl/o	or
++	    s/\?lvsl/lvsr/o	or
++	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
++	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
++	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
++	} else {			# big-endian
++	    s/le\?/#le#/o	or
++	    s/be\?//o		or
++	    s/\?([a-z]+)/$1/o;
++	}
++
++        print $_,"\n";
++}
++
++close STDOUT;
+diff --git a/crypto/aes/asm/vpaes-ppc.pl b/crypto/aes/asm/vpaes-ppc.pl
+new file mode 100644
+index 0000000..7fda60e
+--- /dev/null
++++ b/crypto/aes/asm/vpaes-ppc.pl
+@@ -0,0 +1,1512 @@
++#!/usr/bin/env perl
++
++######################################################################
++## Constant-time SSSE3 AES core implementation.
++## version 0.1
++##
++## By Mike Hamburg (Stanford University), 2009
++## Public domain.
++##
++## For details see http://shiftleft.org/papers/vector_aes/ and
++## http://crypto.stanford.edu/vpaes/.
++
++# CBC encrypt/decrypt performance in cycles per byte processed with
++# 128-bit key.
++#
++#		aes-ppc.pl		this
++# G4e		35.5/52.1/(23.8)	11.9(*)/15.4
++# POWER6	42.7/54.3/(28.2)	63.0/92.8(**)
++# POWER7	32.3/42.9/(18.4)	18.5/23.3
++#
++# (*)	This is ~10% worse than reported in paper. The reason is
++#	twofold. This module doesn't make any assumption about
++#	key schedule (or data for that matter) alignment and handles
++#	it in-line. Secondly it, being transliterated from
++#	vpaes-x86_64.pl, relies on "nested inversion" better suited
++#	for Intel CPUs.
++# (**)	Inadequate POWER6 performance is due to astronomic AltiVec
++#	latency, 9 cycles per simple logical operation.
++
++$flavour = shift;
++
++if ($flavour =~ /64/) {
++	$SIZE_T	=8;
++	$LRSAVE	=2*$SIZE_T;
++	$STU	="stdu";
++	$POP	="ld";
++	$PUSH	="std";
++	$UCMP	="cmpld";
++} elsif ($flavour =~ /32/) {
++	$SIZE_T	=4;
++	$LRSAVE	=$SIZE_T;
++	$STU	="stwu";
++	$POP	="lwz";
++	$PUSH	="stw";
++	$UCMP	="cmplw";
++} else { die "nonsense $flavour"; }
++
++$sp="r1";
++$FRAME=6*$SIZE_T+13*16;	# 13*16 is for v20-v31 offload
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
++
++$code.=<<___;
++.machine	"any"
++
++.text
++
++.align	7	# totally strategic alignment
++_vpaes_consts:
++Lk_mc_forward:	# mc_forward
++	.long	0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c	?inv
++	.long	0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300	?inv
++	.long	0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704	?inv
++	.long	0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08	?inv
++Lk_mc_backward:	# mc_backward
++	.long	0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e	?inv
++	.long	0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a	?inv
++	.long	0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506	?inv
++	.long	0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102	?inv
++Lk_sr:		# sr
++	.long	0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f	?inv
++	.long	0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b	?inv
++	.long	0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07	?inv
++	.long	0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603	?inv
++
++##
++## "Hot" constants
++##
++Lk_inv:		# inv, inva
++	.long	0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704	?rev
++	.long	0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03	?rev
++Lk_ipt:		# input transform (lo, hi)
++	.long	0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca	?rev
++	.long	0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd	?rev
++Lk_sbo:		# sbou, sbot
++	.long	0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15	?rev
++	.long	0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e	?rev
++Lk_sb1:		# sb1u, sb1t
++	.long	0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b	?rev
++	.long	0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5	?rev
++Lk_sb2:		# sb2u, sb2t
++	.long	0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2	?rev
++	.long	0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e	?rev
++
++##
++##  Decryption stuff
++##
++Lk_dipt:	# decryption input transform
++	.long	0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15	?rev
++	.long	0x00650560, 0xe683e386, 0x94f191f4, 0x72177712	?rev
++Lk_dsbo:	# decryption sbox final output
++	.long	0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7	?rev
++	.long	0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca	?rev
++Lk_dsb9:	# decryption sbox output *9*u, *9*t
++	.long	0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca	?rev
++	.long	0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72	?rev
++Lk_dsbd:	# decryption sbox output *D*u, *D*t
++	.long	0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5	?rev
++	.long	0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129	?rev
++Lk_dsbb:	# decryption sbox output *B*u, *B*t
++	.long	0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660	?rev
++	.long	0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3	?rev
++Lk_dsbe:	# decryption sbox output *E*u, *E*t
++	.long	0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222	?rev
++	.long	0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794	?rev
++
++##
++##  Key schedule constants
++##
++Lk_dksd:	# decryption key schedule: invskew x*D
++	.long	0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007	?rev
++	.long	0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f	?rev
++Lk_dksb:	# decryption key schedule: invskew x*B
++	.long	0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603	?rev
++	.long	0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9	?rev
++Lk_dkse:	# decryption key schedule: invskew x*E + 0x63
++	.long	0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553	?rev
++	.long	0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd	?rev
++Lk_dks9:	# decryption key schedule: invskew x*9
++	.long	0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a	?rev
++	.long	0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b	?rev
++
++Lk_rcon:	# rcon
++	.long	0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70	?asis
++Lk_s63:
++	.long	0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b	?asis
++
++Lk_opt:		# output transform
++	.long	0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7	?rev
++	.long	0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1	?rev
++Lk_deskew:	# deskew tables: inverts the sbox's "skew"
++	.long	0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d	?rev
++	.long	0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128	?rev
++.align	5
++Lconsts:
++	mflr	r0
++	bcl	20,31,\$+4
++	mflr	r12	#vvvvv "distance between . and _vpaes_consts
++	addi	r12,r12,-0x308
++	mtlr	r0
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++.asciz  "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)"
++.align	6
++___
++
++my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31));
++{
++my ($inp,$out,$key) = map("r$_",(3..5));
++
++my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15));
++my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19));
++my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23));
++
++$code.=<<___;
++##
++##  _aes_preheat
++##
++##  Fills register %r10 -> .aes_consts (so you can -fPIC)
++##  and %xmm9-%xmm15 as specified below.
++##
++.align	4
++_vpaes_encrypt_preheat:
++	mflr	r8
++	bl	Lconsts
++	mtlr	r8
++	li	r11, 0xc0		# Lk_inv
++	li	r10, 0xd0
++	li	r9,  0xe0		# Lk_ipt
++	li	r8,  0xf0
++	vxor	v7, v7, v7		# 0x00..00
++	vspltisb	v8,4		# 0x04..04
++	vspltisb	v9,0x0f		# 0x0f..0f
++	lvx	$invlo, r12, r11
++	li	r11, 0x100
++	lvx	$invhi, r12, r10
++	li	r10, 0x110
++	lvx	$iptlo, r12, r9
++	li	r9,  0x120
++	lvx	$ipthi, r12, r8
++	li	r8,  0x130
++	lvx	$sbou, r12, r11
++	li	r11, 0x140
++	lvx	$sbot, r12, r10
++	li	r10, 0x150
++	lvx	$sb1u, r12, r9
++	lvx	$sb1t, r12, r8
++	lvx	$sb2u, r12, r11
++	lvx	$sb2t, r12, r10
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++##
++##  _aes_encrypt_core
++##
++##  AES-encrypt %xmm0.
++##
++##  Inputs:
++##     %xmm0 = input
++##     %xmm9-%xmm15 as in _vpaes_preheat
++##    (%rdx) = scheduled keys
++##
++##  Output in %xmm0
++##  Clobbers  %xmm1-%xmm6, %r9, %r10, %r11, %rax
++##
++##
++.align 5
++_vpaes_encrypt_core:
++	lwz	r8, 240($key)		# pull rounds
++	li	r9, 16
++	lvx	v5, 0, $key		# vmovdqu	(%r9),	%xmm5		# round0 key
++	li	r11, 0x10
++	lvx	v6, r9, $key
++	addi	r9, r9, 16
++	?vperm	v5, v5, v6, $keyperm	# align round key
++	addi	r10, r11, 0x40
++	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0
++	vperm	v0, $iptlo, $iptlo, v0	# vpshufb	%xmm1,	%xmm2,	%xmm1
++	vperm	v1, $ipthi, $ipthi, v1	# vpshufb	%xmm0,	%xmm3,	%xmm2
++	vxor	v0, v0, v5		# vpxor	%xmm5,	%xmm1,	%xmm0
++	vxor	v0, v0, v1		# vpxor	%xmm2,	%xmm0,	%xmm0
++	mtctr	r8
++	b	Lenc_entry
++
++.align 4
++Lenc_loop:
++	# middle of middle round
++	vperm	v4, $sb1t, v7, v2	# vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
++	lvx	v1, r12, r11		# vmovdqa	-0x40(%r11,%r10), %xmm1	# .Lk_mc_forward[]
++	addi	r11, r11, 16
++	vperm	v0, $sb1u, v7, v3	# vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
++	vxor	v4, v4, v5		# vpxor		%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	andi.	r11, r11, 0x30		# and		\$0x30, %r11	# ... mod 4
++	vperm	v5, $sb2t, v7, v2	# vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
++	vxor	v0, v0, v4		# vpxor		%xmm4,	%xmm0,	%xmm0	# 0 = A
++	vperm	v2, $sb2u, v7, v3	# vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
++	lvx	v4, r12, r10		# vmovdqa	(%r11,%r10), %xmm4	# .Lk_mc_backward[]
++	addi	r10, r11, 0x40
++	vperm	v3, v0, v7, v1		# vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
++	vxor	v2, v2, v5		# vpxor		%xmm5,	%xmm2,	%xmm2	# 2 = 2A
++	vperm	v0, v0, v7, v4		# vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
++	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
++	vperm	v4, v3, v7, v1		# vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
++	vxor	v0, v0, v3		# vpxor		%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
++	vxor	v0, v0, v4		# vpxor		%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
++
++Lenc_entry:
++	# top of round
++	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
++	vperm	v5, $invhi, $invhi, v0	# vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
++	vxor	v0, v0, v1		# vpxor		%xmm0,	%xmm1,	%xmm1	# 0 = j
++	vperm	v3, $invlo, $invlo, v1	# vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
++	vperm	v4, $invlo, $invlo, v0	# vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
++	vand	v0, v0, v9
++	vxor	v3, v3, v5		# vpxor		%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
++	vxor	v4, v4, v5		# vpxor		%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
++	vperm	v2, $invlo, v7, v3	# vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
++	vmr	v5, v6
++	lvx	v6, r9, $key		# vmovdqu	(%r9), %xmm5
++	vperm	v3, $invlo, v7, v4	# vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
++	addi	r9, r9, 16
++	vxor	v2, v2, v0		# vpxor		%xmm1,	%xmm2,	%xmm2  	# 2 = io
++	?vperm	v5, v5, v6, $keyperm	# align round key
++	vxor	v3, v3, v1		# vpxor		%xmm0,	%xmm3,	%xmm3	# 3 = jo
++	bdnz	Lenc_loop
++
++	# middle of last round
++	addi	r10, r11, 0x80
++					# vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
++					# vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
++	vperm	v4, $sbou, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
++	lvx	v1, r12, r10		# vmovdqa	0x40(%r11,%r10), %xmm1	# .Lk_sr[]
++	vperm	v0, $sbot, v7, v3	# vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
++	vxor	v4, v4, v5		# vpxor		%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
++	vxor	v0, v0, v4		# vpxor		%xmm4,	%xmm0,	%xmm0	# 0 = A
++	vperm	v0, v0, v7, v1		# vpshufb	%xmm1,	%xmm0,	%xmm0
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++.globl	.vpaes_encrypt
++.align	5
++.vpaes_encrypt:
++	$STU	$sp,-$FRAME($sp)
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mflr	r6
++	mfspr	r7, 256			# save vrsave
++	stvx	v20,r10,$sp
++	addi	r10,r10,32
++	stvx	v21,r11,$sp
++	addi	r11,r11,32
++	stvx	v22,r10,$sp
++	addi	r10,r10,32
++	stvx	v23,r11,$sp
++	addi	r11,r11,32
++	stvx	v24,r10,$sp
++	addi	r10,r10,32
++	stvx	v25,r11,$sp
++	addi	r11,r11,32
++	stvx	v26,r10,$sp
++	addi	r10,r10,32
++	stvx	v27,r11,$sp
++	addi	r11,r11,32
++	stvx	v28,r10,$sp
++	addi	r10,r10,32
++	stvx	v29,r11,$sp
++	addi	r11,r11,32
++	stvx	v30,r10,$sp
++	stvx	v31,r11,$sp
++	stw	r7,`$FRAME-4`($sp)	# save vrsave
++	li	r0, -1
++	$PUSH	r6,`$FRAME+$LRSAVE`($sp)
++	mtspr	256, r0			# preserve all AltiVec registers
++
++	bl	_vpaes_encrypt_preheat
++
++	?lvsl	$inpperm, 0, $inp	# prepare for unaligned access
++	lvx	v0, 0, $inp
++	addi	$inp, $inp, 15		# 15 is not a typo
++	 ?lvsr	$outperm, 0, $out
++	?lvsl	$keyperm, 0, $key	# prepare for unaligned access
++	 vnor	$outmask, v7, v7	# 0xff..ff
++	lvx	$inptail, 0, $inp	# redundant in aligned case
++	 ?vperm	$outmask, v7, $outmask, $outperm
++	 lvx	$outhead, 0, $out
++	?vperm	v0, v0, $inptail, $inpperm
++
++	bl	_vpaes_encrypt_core
++
++	vperm	v0, v0, v0, $outperm	# rotate right/left
++	vsel	v1, $outhead, v0, $outmask
++	vmr	$outhead, v0
++	stvx	v1, 0, $out
++	addi	$out, $out, 15		# 15 is not a typo
++	########
++
++	lvx	v1, 0, $out		# redundant in aligned case
++	vsel	v1, $outhead, v1, $outmask
++	stvx	v1, 0, $out
++
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mtlr	r6
++	mtspr	256, r7			# restore vrsave
++	lvx	v20,r10,$sp
++	addi	r10,r10,32
++	lvx	v21,r11,$sp
++	addi	r11,r11,32
++	lvx	v22,r10,$sp
++	addi	r10,r10,32
++	lvx	v23,r11,$sp
++	addi	r11,r11,32
++	lvx	v24,r10,$sp
++	addi	r10,r10,32
++	lvx	v25,r11,$sp
++	addi	r11,r11,32
++	lvx	v26,r10,$sp
++	addi	r10,r10,32
++	lvx	v27,r11,$sp
++	addi	r11,r11,32
++	lvx	v28,r10,$sp
++	addi	r10,r10,32
++	lvx	v29,r11,$sp
++	addi	r11,r11,32
++	lvx	v30,r10,$sp
++	lvx	v31,r11,$sp
++	addi	$sp,$sp,$FRAME
++	blr
++	.long	0
++	.byte	0,12,0x04,1,0x80,0,3,0
++	.long	0
++.size	.vpaes_encrypt,.-.vpaes_encrypt
++
++.align	4
++_vpaes_decrypt_preheat:
++	mflr	r8
++	bl	Lconsts
++	mtlr	r8
++	li	r11, 0xc0		# Lk_inv
++	li	r10, 0xd0
++	li	r9,  0x160		# Ldipt
++	li	r8,  0x170
++	vxor	v7, v7, v7		# 0x00..00
++	vspltisb	v8,4		# 0x04..04
++	vspltisb	v9,0x0f		# 0x0f..0f
++	lvx	$invlo, r12, r11
++	li	r11, 0x180
++	lvx	$invhi, r12, r10
++	li	r10, 0x190
++	lvx	$iptlo, r12, r9
++	li	r9,  0x1a0
++	lvx	$ipthi, r12, r8
++	li	r8,  0x1b0
++	lvx	$sbou, r12, r11
++	li	r11, 0x1c0
++	lvx	$sbot, r12, r10
++	li	r10, 0x1d0
++	lvx	$sb9u, r12, r9
++	li	r9,  0x1e0
++	lvx	$sb9t, r12, r8
++	li	r8,  0x1f0
++	lvx	$sbdu, r12, r11
++	li	r11, 0x200
++	lvx	$sbdt, r12, r10
++	li	r10, 0x210
++	lvx	$sbbu, r12, r9
++	lvx	$sbbt, r12, r8
++	lvx	$sbeu, r12, r11
++	lvx	$sbet, r12, r10
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++##
++##  Decryption core
++##
++##  Same API as encryption core.
++##
++.align	4
++_vpaes_decrypt_core:
++	lwz	r8, 240($key)		# pull rounds
++	li	r9, 16
++	lvx	v5, 0, $key		# vmovdqu	(%r9),	%xmm4		# round0 key
++	li	r11, 0x30
++	lvx	v6, r9, $key
++	addi	r9, r9, 16
++	?vperm	v5, v5, v6, $keyperm	# align round key
++	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0
++	vperm	v0, $iptlo, $iptlo, v0	# vpshufb	%xmm1,	%xmm2,	%xmm2
++	vperm	v1, $ipthi, $ipthi, v1	# vpshufb	%xmm0,	%xmm1,	%xmm0
++	vxor	v0, v0, v5		# vpxor	%xmm4,	%xmm2,	%xmm2
++	vxor	v0, v0, v1		# vpxor	%xmm2,	%xmm0,	%xmm0
++	mtctr	r8
++	b	Ldec_entry
++
++.align 4
++Ldec_loop:
++#
++#  Inverse mix columns
++#
++	lvx	v0, r12, r11		# v5 and v0 are flipped
++					# vmovdqa	-0x20(%r10),%xmm4		# 4 : sb9u
++					# vmovdqa	-0x10(%r10),%xmm1		# 0 : sb9t
++	vperm	v4, $sb9u, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sb9u
++	subi	r11, r11, 16
++	vperm	v1, $sb9t, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sb9t
++	andi.	r11, r11, 0x30
++	vxor	v5, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0
++					# vmovdqa	0x00(%r10),%xmm4		# 4 : sbdu
++	vxor	v5, v5, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
++					# vmovdqa	0x10(%r10),%xmm1		# 0 : sbdt
++
++	vperm	v4, $sbdu, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbdu
++	vperm 	v5, v5, v7, v0		# vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
++	vperm	v1, $sbdt, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbdt
++	vxor	v5, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0		# 4 = ch
++					# vmovdqa	0x20(%r10),	%xmm4		# 4 : sbbu
++	vxor	v5, v5, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
++					# vmovdqa	0x30(%r10),	%xmm1		# 0 : sbbt
++
++	vperm	v4, $sbbu, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbbu
++	vperm	v5, v5, v7, v0		# vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
++	vperm	v1, $sbbt, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbbt
++	vxor	v5, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0		# 4 = ch
++					# vmovdqa	0x40(%r10),	%xmm4		# 4 : sbeu
++	vxor	v5, v5, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
++					# vmovdqa	0x50(%r10),	%xmm1		# 0 : sbet
++
++	vperm	v4, $sbeu, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbeu
++	vperm	v5, v5, v7, v0		# vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
++	vperm	v1, $sbet, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbet
++	vxor	v0, v5, v4		# vpxor		%xmm4,	%xmm0,	%xmm0		# 4 = ch
++	vxor	v0, v0, v1		# vpxor		%xmm1,	%xmm0,	%xmm0		# 0 = ch
++
++Ldec_entry:
++	# top of round
++	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0	# 1 = i
++	vperm	v2, $invhi, $invhi, v0	# vpshufb	%xmm1,	%xmm11,	%xmm2	# 2 = a/k
++	vxor	v0, v0, v1		# vpxor		%xmm0,	%xmm1,	%xmm1	# 0 = j
++	vperm	v3, $invlo, $invlo, v1	# vpshufb	%xmm0, 	%xmm10,	%xmm3	# 3 = 1/i
++	vperm	v4, $invlo, $invlo, v0	# vpshufb	%xmm1,	%xmm10,	%xmm4	# 4 = 1/j
++	vand	v0, v0, v9
++	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
++	vxor	v4, v4, v2		# vpxor		%xmm2, 	%xmm4,	%xmm4	# 4 = jak = 1/j + a/k
++	vperm	v2, $invlo, v7, v3	# vpshufb	%xmm3,	%xmm10,	%xmm2	# 2 = 1/iak
++	vmr	v5, v6
++	lvx	v6, r9, $key		# vmovdqu	(%r9),	%xmm0
++	vperm	v3, $invlo, v7, v4	# vpshufb	%xmm4,  %xmm10,	%xmm3	# 3 = 1/jak
++	addi	r9, r9, 16
++	vxor	v2, v2, v0		# vpxor		%xmm1,	%xmm2,	%xmm2	# 2 = io
++	?vperm	v5, v5, v6, $keyperm	# align round key
++	vxor	v3, v3, v1		# vpxor		%xmm0,  %xmm3,	%xmm3	# 3 = jo
++	bdnz	Ldec_loop
++
++	# middle of last round
++	addi	r10, r11, 0x80
++					# vmovdqa	0x60(%r10),	%xmm4	# 3 : sbou
++	vperm	v4, $sbou, v7, v2	# vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
++					# vmovdqa	0x70(%r10),	%xmm1	# 0 : sbot
++	lvx	v2, r12, r10		# vmovdqa	-0x160(%r11),	%xmm2	# .Lk_sr-.Lk_dsbd=-0x160
++	vperm	v1, $sbot, v7, v3	# vpshufb	%xmm3,	%xmm1,	%xmm1	# 0 = sb1t
++	vxor	v4, v4, v5		# vpxor		%xmm0,	%xmm4,	%xmm4	# 4 = sb1u + k
++	vxor	v0, v1, v4		# vpxor		%xmm4,	%xmm1,	%xmm0	# 0 = A
++	vperm	v0, v0, v7, v2		# vpshufb	%xmm2,	%xmm0,	%xmm0
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++.globl	.vpaes_decrypt
++.align	5
++.vpaes_decrypt:
++	$STU	$sp,-$FRAME($sp)
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mflr	r6
++	mfspr	r7, 256			# save vrsave
++	stvx	v20,r10,$sp
++	addi	r10,r10,32
++	stvx	v21,r11,$sp
++	addi	r11,r11,32
++	stvx	v22,r10,$sp
++	addi	r10,r10,32
++	stvx	v23,r11,$sp
++	addi	r11,r11,32
++	stvx	v24,r10,$sp
++	addi	r10,r10,32
++	stvx	v25,r11,$sp
++	addi	r11,r11,32
++	stvx	v26,r10,$sp
++	addi	r10,r10,32
++	stvx	v27,r11,$sp
++	addi	r11,r11,32
++	stvx	v28,r10,$sp
++	addi	r10,r10,32
++	stvx	v29,r11,$sp
++	addi	r11,r11,32
++	stvx	v30,r10,$sp
++	stvx	v31,r11,$sp
++	stw	r7,`$FRAME-4`($sp)	# save vrsave
++	li	r0, -1
++	$PUSH	r6,`$FRAME+$LRSAVE`($sp)
++	mtspr	256, r0			# preserve all AltiVec registers
++
++	bl	_vpaes_decrypt_preheat
++
++	?lvsl	$inpperm, 0, $inp	# prepare for unaligned access
++	lvx	v0, 0, $inp
++	addi	$inp, $inp, 15		# 15 is not a typo
++	 ?lvsr	$outperm, 0, $out
++	?lvsl	$keyperm, 0, $key
++	 vnor	$outmask, v7, v7	# 0xff..ff
++	lvx	$inptail, 0, $inp	# redundant in aligned case
++	 ?vperm	$outmask, v7, $outmask, $outperm
++	 lvx	$outhead, 0, $out
++	?vperm	v0, v0, $inptail, $inpperm
++
++	bl	_vpaes_decrypt_core
++
++	vperm	v0, v0, v0, $outperm	# rotate right/left
++	vsel	v1, $outhead, v0, $outmask
++	vmr	$outhead, v0
++	stvx	v1, 0, $out
++	addi	$out, $out, 15		# 15 is not a typo
++	########
++
++	lvx	v1, 0, $out		# redundant in aligned case
++	vsel	v1, $outhead, v1, $outmask
++	stvx	v1, 0, $out
++
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mtlr	r6
++	mtspr	256, r7			# restore vrsave
++	lvx	v20,r10,$sp
++	addi	r10,r10,32
++	lvx	v21,r11,$sp
++	addi	r11,r11,32
++	lvx	v22,r10,$sp
++	addi	r10,r10,32
++	lvx	v23,r11,$sp
++	addi	r11,r11,32
++	lvx	v24,r10,$sp
++	addi	r10,r10,32
++	lvx	v25,r11,$sp
++	addi	r11,r11,32
++	lvx	v26,r10,$sp
++	addi	r10,r10,32
++	lvx	v27,r11,$sp
++	addi	r11,r11,32
++	lvx	v28,r10,$sp
++	addi	r10,r10,32
++	lvx	v29,r11,$sp
++	addi	r11,r11,32
++	lvx	v30,r10,$sp
++	lvx	v31,r11,$sp
++	addi	$sp,$sp,$FRAME
++	blr
++	.long	0
++	.byte	0,12,0x04,1,0x80,0,3,0
++	.long	0
++.size	.vpaes_decrypt,.-.vpaes_decrypt
++
++.globl	.vpaes_cbc_encrypt
++.align	5
++.vpaes_cbc_encrypt:
++	${UCMP}i r5,16
++	bltlr-
++
++	$STU	$sp,-`($FRAME+2*$SIZE_T)`($sp)
++	mflr	r0
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mfspr	r12, 256
++	stvx	v20,r10,$sp
++	addi	r10,r10,32
++	stvx	v21,r11,$sp
++	addi	r11,r11,32
++	stvx	v22,r10,$sp
++	addi	r10,r10,32
++	stvx	v23,r11,$sp
++	addi	r11,r11,32
++	stvx	v24,r10,$sp
++	addi	r10,r10,32
++	stvx	v25,r11,$sp
++	addi	r11,r11,32
++	stvx	v26,r10,$sp
++	addi	r10,r10,32
++	stvx	v27,r11,$sp
++	addi	r11,r11,32
++	stvx	v28,r10,$sp
++	addi	r10,r10,32
++	stvx	v29,r11,$sp
++	addi	r11,r11,32
++	stvx	v30,r10,$sp
++	stvx	v31,r11,$sp
++	stw	r12,`$FRAME-4`($sp)	# save vrsave
++	$PUSH	r30,`$FRAME+$SIZE_T*0`($sp)
++	$PUSH	r31,`$FRAME+$SIZE_T*1`($sp)
++	li	r9, -16
++	$PUSH	r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
++
++	and	r30, r5, r9		# copy length&-16
++	mr	r5, r6			# copy pointer to key
++	mr	r31, r7			# copy pointer to iv
++	blt	Lcbc_abort
++	cmpwi	r8, 0			# test direction
++	li	r6, -1
++	mr	r7, r12			# copy vrsave
++	mtspr	256, r6			# preserve all AltiVec registers
++
++	lvx	v24, 0, r31		# load [potentially unaligned] iv
++	li	r9, 15
++	?lvsl	$inpperm, 0, r31
++	lvx	v25, r9, r31
++	?vperm	v24, v24, v25, $inpperm
++
++	neg	r8, $inp		# prepare for unaligned access
++	 vxor	v7, v7, v7
++	?lvsl	$keyperm, 0, $key
++	 ?lvsr	$outperm, 0, $out
++	?lvsr	$inpperm, 0, r8		# -$inp
++	 vnor	$outmask, v7, v7	# 0xff..ff
++	lvx	$inptail, 0, $inp
++	 ?vperm	$outmask, v7, $outmask, $outperm
++	addi	$inp, $inp, 15		# 15 is not a typo
++	 lvx	$outhead, 0, $out
++
++	beq	Lcbc_decrypt
++
++	bl	_vpaes_encrypt_preheat
++	li	r0, 16
++
++Lcbc_enc_loop:
++	vmr	v0, $inptail
++	lvx	$inptail, 0, $inp
++	addi	$inp, $inp, 16
++	?vperm	v0, v0, $inptail, $inpperm
++	vxor	v0, v0, v24		# ^= iv
++
++	bl	_vpaes_encrypt_core
++
++	vmr	v24, v0			# put aside iv
++	sub.	r30, r30, r0		# len -= 16
++	vperm	v0, v0, v0, $outperm	# rotate right/left
++	vsel	v1, $outhead, v0, $outmask
++	vmr	$outhead, v0
++	stvx	v1, 0, $out
++	addi	$out, $out, 16
++	bne	Lcbc_enc_loop
++
++	b	Lcbc_done
++
++.align	5
++Lcbc_decrypt:
++	bl	_vpaes_decrypt_preheat
++	li	r0, 16
++
++Lcbc_dec_loop:
++	vmr	v0, $inptail
++	lvx	$inptail, 0, $inp
++	addi	$inp, $inp, 16
++	?vperm	v0, v0, $inptail, $inpperm
++	vmr	v25, v0			# put aside input
++
++	bl	_vpaes_decrypt_core
++
++	vxor	v0, v0, v24		# ^= iv
++	vmr	v24, v25
++	sub.	r30, r30, r0		# len -= 16
++	vperm	v0, v0, v0, $outperm	# rotate right/left
++	vsel	v1, $outhead, v0, $outmask
++	vmr	$outhead, v0
++	stvx	v1, 0, $out
++	addi	$out, $out, 16
++	bne	Lcbc_dec_loop
++
++Lcbc_done:
++	addi	$out, $out, -1
++	lvx	v1, 0, $out		# redundant in aligned case
++	vsel	v1, $outhead, v1, $outmask
++	stvx	v1, 0, $out
++
++	neg	r8, r31			# write [potentially unaligned] iv
++	?lvsl	$outperm, 0, r8
++	li	r6, 15
++	vnor	$outmask, v7, v7	# 0xff..ff
++	?vperm	$outmask, v7, $outmask, $outperm
++	lvx	$outhead, 0, r31
++	vperm	v24, v24, v24, $outperm	# rotate right/left
++	vsel	v0, $outhead, v24, $outmask
++	lvx	v1, r6, r31
++	stvx	v0, 0, r31
++	vsel	v1, v24, v1, $outmask
++	stvx	v1, r6, r31
++
++	mtspr	256, r7			# restore vrsave
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	lvx	v20,r10,$sp
++	addi	r10,r10,32
++	lvx	v21,r11,$sp
++	addi	r11,r11,32
++	lvx	v22,r10,$sp
++	addi	r10,r10,32
++	lvx	v23,r11,$sp
++	addi	r11,r11,32
++	lvx	v24,r10,$sp
++	addi	r10,r10,32
++	lvx	v25,r11,$sp
++	addi	r11,r11,32
++	lvx	v26,r10,$sp
++	addi	r10,r10,32
++	lvx	v27,r11,$sp
++	addi	r11,r11,32
++	lvx	v28,r10,$sp
++	addi	r10,r10,32
++	lvx	v29,r11,$sp
++	addi	r11,r11,32
++	lvx	v30,r10,$sp
++	lvx	v31,r11,$sp
++Lcbc_abort:
++	$POP	r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
++	$POP	r30,`$FRAME+$SIZE_T*0`($sp)
++	$POP	r31,`$FRAME+$SIZE_T*1`($sp)
++	mtlr	r0
++	addi	$sp,$sp,`$FRAME+$SIZE_T*2`
++	blr
++	.long	0
++	.byte	0,12,0x04,1,0x80,2,6,0
++	.long	0
++.size	.vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt
++___
++}
++{
++my ($inp,$bits,$out)=map("r$_",(3..5));
++my $dir="cr1";
++my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24));
++
++$code.=<<___;
++########################################################
++##                                                    ##
++##                  AES key schedule                  ##
++##                                                    ##
++########################################################
++.align	4
++_vpaes_key_preheat:
++	mflr	r8
++	bl	Lconsts
++	mtlr	r8
++	li	r11, 0xc0		# Lk_inv
++	li	r10, 0xd0
++	li	r9,  0xe0		# L_ipt
++	li	r8,  0xf0
++
++	vspltisb	v8,4		# 0x04..04
++	vxor	v9,v9,v9		# 0x00..00
++	lvx	$invlo, r12, r11	# Lk_inv
++	li	r11, 0x120
++	lvx	$invhi, r12, r10
++	li	r10, 0x130
++	lvx	$iptlo, r12, r9		# Lk_ipt
++	li	r9, 0x220
++	lvx	$ipthi, r12, r8
++	li	r8, 0x230
++
++	lvx	v14, r12, r11		# Lk_sb1
++	li	r11, 0x240
++	lvx	v15, r12, r10
++	li	r10, 0x250
++
++	lvx	v16, r12, r9		# Lk_dksd
++	li	r9, 0x260
++	lvx	v17, r12, r8
++	li	r8, 0x270
++	lvx	v18, r12, r11		# Lk_dksb
++	li	r11, 0x280
++	lvx	v19, r12, r10
++	li	r10, 0x290
++	lvx	v20, r12, r9		# Lk_dkse
++	li	r9, 0x2a0
++	lvx	v21, r12, r8
++	li	r8, 0x2b0
++	lvx	v22, r12, r11		# Lk_dks9
++	lvx	v23, r12, r10
++
++	lvx	v24, r12, r9		# Lk_rcon
++	lvx	v25, 0, r12		# Lk_mc_forward[0]
++	lvx	v26, r12, r8		# Lks63
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++.align	4
++_vpaes_schedule_core:
++	mflr	r7
++
++	bl	_vpaes_key_preheat	# load the tables
++
++	#lvx	v0, 0, $inp		# vmovdqu	(%rdi),	%xmm0		# load key (unaligned)
++	neg	r8, $inp		# prepare for unaligned access
++	lvx	v0, 0, $inp
++	addi	$inp, $inp, 15		# 15 is not typo
++	?lvsr	$inpperm, 0, r8		# -$inp
++	lvx	v6, 0, $inp		# v6 serves as inptail
++	addi	$inp, $inp, 8
++	?vperm	v0, v0, v6, $inpperm
++
++	# input transform
++	vmr	v3, v0			# vmovdqa	%xmm0,	%xmm3
++	bl	_vpaes_schedule_transform
++	vmr	v7, v0			# vmovdqa	%xmm0,	%xmm7
++
++	bne	$dir, Lschedule_am_decrypting
++
++	# encrypting, output zeroth round key after transform
++	li	r8, 0x30		# mov	\$0x30,%r8d
++	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
++
++	?lvsr	$outperm, 0, $out	# prepare for unaligned access
++	vnor	$outmask, v9, v9	# 0xff..ff
++	lvx	$outhead, 0, $out
++	?vperm	$outmask, v9, $outmask, $outperm
++
++	#stvx	v0, 0, $out		# vmovdqu	%xmm0,	(%rdx)
++	vperm	v1, v0, v0, $outperm	# rotate right/left
++	vsel	v2, $outhead, v1, $outmask
++	vmr	$outhead, v1
++	stvx	v2, 0, $out
++	b	Lschedule_go
++
++Lschedule_am_decrypting:
++	srwi	r8, $bits, 1		# shr	\$1,%r8d
++	andi.	r8, r8, 32		# and	\$32,%r8d
++	xori	r8, r8, 32		# xor	\$32,%r8d	# nbits==192?0:32
++	addi	r10, r12, 0x80		# lea	.Lk_sr(%rip),%r10
++	# decrypting, output zeroth round key after shiftrows
++	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),	%xmm1
++	vperm	v4, v3, v3, v1		# vpshufb	%xmm1,	%xmm3,	%xmm3
++
++	neg	r0, $out		# prepare for unaligned access
++	?lvsl	$outperm, 0, r0
++	addi	$out, $out, 15		# 15 is not typo
++	vnor	$outmask, v9, v9	# 0xff..ff
++	lvx	$outhead, 0, $out
++	?vperm	$outmask, $outmask, v9, $outperm
++
++	#stvx	v4, 0, $out		# vmovdqu	%xmm3,	(%rdx)
++	vperm	v4, v4, v4, $outperm	# rotate right/left
++	vsel	v2, $outhead, v4, $outmask
++	vmr	$outhead, v4
++	stvx	v2, 0, $out
++	xori	r8, r8, 0x30		# xor	\$0x30, %r8
++
++Lschedule_go:
++	cmplwi	$bits, 192		# cmp	\$192,	%esi
++	bgt	Lschedule_256
++	beq	Lschedule_192
++	# 128: fall though
++
++##
++##  .schedule_128
++##
++##  128-bit specific part of key schedule.
++##
++##  This schedule is really simple, because all its parts
++##  are accomplished by the subroutines.
++##
++Lschedule_128:
++	li	r0, 10			# mov	\$10, %esi
++	mtctr	r0
++
++Loop_schedule_128:
++	bl 	_vpaes_schedule_round
++	bdz 	Lschedule_mangle_last	# dec	%esi
++	bl	_vpaes_schedule_mangle	# write output
++	b 	Loop_schedule_128
++
++##
++##  .aes_schedule_192
++##
++##  192-bit specific part of key schedule.
++##
++##  The main body of this schedule is the same as the 128-bit
++##  schedule, but with more smearing.  The long, high side is
++##  stored in %xmm7 as before, and the short, low side is in
++##  the high bits of %xmm6.
++##
++##  This schedule is somewhat nastier, however, because each
++##  round produces 192 bits of key material, or 1.5 round keys.
++##  Therefore, on each cycle we do 2 rounds and produce 3 round
++##  keys.
++##
++.align	4
++Lschedule_192:
++	li	r0, 4			# mov	\$4,	%esi
++	lvx	v0, 0, $inp
++	?vperm	v0, v6, v0, $inpperm
++	?vsldoi	v0, v3, v0, 8		# vmovdqu	8(%rdi),%xmm0		# load key part 2 (very unaligned)
++	bl	_vpaes_schedule_transform	# input transform
++	?vsldoi	v6, v0, v9, 8
++	?vsldoi	v6, v9, v6, 8		# clobber "low" side with zeros
++	mtctr	r0
++
++Loop_schedule_192:
++	bl	_vpaes_schedule_round
++	?vsldoi	v0, v6, v0, 8		# vpalignr	\$8,%xmm6,%xmm0,%xmm0
++	bl	_vpaes_schedule_mangle	# save key n
++	bl	_vpaes_schedule_192_smear
++	bl	_vpaes_schedule_mangle	# save key n+1
++	bl	_vpaes_schedule_round
++	bdz 	Lschedule_mangle_last	# dec	%esi
++	bl	_vpaes_schedule_mangle	# save key n+2
++	bl	_vpaes_schedule_192_smear
++	b	Loop_schedule_192
++
++##
++##  .aes_schedule_256
++##
++##  256-bit specific part of key schedule.
++##
++##  The structure here is very similar to the 128-bit
++##  schedule, but with an additional "low side" in
++##  %xmm6.  The low side's rounds are the same as the
++##  high side's, except no rcon and no rotation.
++##
++.align	4
++Lschedule_256:
++	li	r0, 7			# mov	\$7, %esi
++	addi	$inp, $inp, 8
++	lvx	v0, 0, $inp		# vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
++	?vperm	v0, v6, v0, $inpperm
++	bl	_vpaes_schedule_transform	# input transform
++	mtctr	r0
++
++Loop_schedule_256:
++	bl	_vpaes_schedule_mangle	# output low result
++	vmr	v6, v0			# vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
++
++	# high round
++	bl	_vpaes_schedule_round
++	bdz 	Lschedule_mangle_last	# dec	%esi
++	bl	_vpaes_schedule_mangle	
++
++	# low round. swap xmm7 and xmm6
++	?vspltw	v0, v0, 3		# vpshufd	\$0xFF,	%xmm0,	%xmm0
++	vmr	v5, v7			# vmovdqa	%xmm7,	%xmm5
++	vmr	v7, v6			# vmovdqa	%xmm6,	%xmm7
++	bl	_vpaes_schedule_low_round
++	vmr	v7, v5			# vmovdqa	%xmm5,	%xmm7
++	
++	b	Loop_schedule_256
++##
++##  .aes_schedule_mangle_last
++##
++##  Mangler for last round of key schedule
++##  Mangles %xmm0
++##    when encrypting, outputs out(%xmm0) ^ 63
++##    when decrypting, outputs unskew(%xmm0)
++##
++##  Always called right before return... jumps to cleanup and exits
++##
++.align	4
++Lschedule_mangle_last:
++	# schedule last round key from xmm0
++	li	r11, 0x2e0		# lea	.Lk_deskew(%rip),%r11
++	li	r9,  0x2f0
++	bne	$dir, Lschedule_mangle_last_dec
++
++	# encrypting
++	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),%xmm1
++	li	r11, 0x2c0		# lea		.Lk_opt(%rip),	%r11	# prepare to output transform
++	li	r9,  0x2d0		# prepare to output transform
++	vperm	v0, v0, v0, v1		# vpshufb	%xmm1,	%xmm0,	%xmm0	# output permute
++
++	lvx	$iptlo, r11, r12	# reload $ipt
++	lvx	$ipthi, r9, r12
++	addi	$out, $out, 16		# add	\$16,	%rdx
++	vxor	v0, v0, v26		# vpxor		.Lk_s63(%rip),	%xmm0,	%xmm0
++	bl	_vpaes_schedule_transform	# output transform
++
++	#stvx	v0, r0, $out		# vmovdqu	%xmm0,	(%rdx)		# save last key
++	vperm	v0, v0, v0, $outperm	# rotate right/left
++	vsel	v2, $outhead, v0, $outmask
++	vmr	$outhead, v0
++	stvx	v2, 0, $out
++
++	addi	$out, $out, 15		# 15 is not typo
++	lvx	v1, 0, $out		# redundant in aligned case
++	vsel	v1, $outhead, v1, $outmask
++	stvx	v1, 0, $out
++	b	Lschedule_mangle_done
++
++.align	4
++Lschedule_mangle_last_dec:
++	lvx	$iptlo, r11, r12	# reload $ipt
++	lvx	$ipthi, r9,  r12
++	addi	$out, $out, -16		# add	\$-16,	%rdx 
++	vxor	v0, v0, v26		# vpxor	.Lk_s63(%rip),	%xmm0,	%xmm0
++	bl	_vpaes_schedule_transform	# output transform
++
++	#stvx	v0, r0, $out		# vmovdqu	%xmm0,	(%rdx)		# save last key
++	vperm	v0, v0, v0, $outperm	# rotate right/left
++	vsel	v2, $outhead, v0, $outmask
++	vmr	$outhead, v0
++	stvx	v2, 0, $out
++
++	addi	$out, $out, -15		# -15 is not typo
++	lvx	v1, 0, $out		# redundant in aligned case
++	vsel	v1, $outhead, v1, $outmask
++	stvx	v1, 0, $out
++
++Lschedule_mangle_done:
++	mtlr	r7
++	# cleanup
++	vxor	v0, v0, v0		# vpxor		%xmm0,	%xmm0,	%xmm0
++	vxor	v1, v1, v1		# vpxor		%xmm1,	%xmm1,	%xmm1
++	vxor	v2, v2, v2		# vpxor		%xmm2,	%xmm2,	%xmm2
++	vxor	v3, v3, v3		# vpxor		%xmm3,	%xmm3,	%xmm3
++	vxor	v4, v4, v4		# vpxor		%xmm4,	%xmm4,	%xmm4
++	vxor	v5, v5, v5		# vpxor		%xmm5,	%xmm5,	%xmm5
++	vxor	v6, v6, v6		# vpxor		%xmm6,	%xmm6,	%xmm6
++	vxor	v7, v7, v7		# vpxor		%xmm7,	%xmm7,	%xmm7
++
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++##
++##  .aes_schedule_192_smear
++##
++##  Smear the short, low side in the 192-bit key schedule.
++##
++##  Inputs:
++##    %xmm7: high side, b  a  x  y
++##    %xmm6:  low side, d  c  0  0
++##    %xmm13: 0
++##
++##  Outputs:
++##    %xmm6: b+c+d  b+c  0  0
++##    %xmm0: b+c+d  b+c  b  a
++##
++.align	4
++_vpaes_schedule_192_smear:
++	?vspltw	v0, v7, 3
++	?vsldoi	v1, v9, v6, 12		# vpshufd	\$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
++	?vsldoi	v0, v7, v0, 8		# vpshufd	\$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
++	vxor	v6, v6, v1		# vpxor		%xmm1,	%xmm6,	%xmm6	# -> c+d c 0 0
++	vxor	v6, v6, v0		# vpxor		%xmm0,	%xmm6,	%xmm6	# -> b+c+d b+c b a
++	vmr	v0, v6
++	?vsldoi	v6, v6, v9, 8
++	?vsldoi	v6, v9, v6, 8		# clobber low side with zeros
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++##
++##  .aes_schedule_round
++##
++##  Runs one main round of the key schedule on %xmm0, %xmm7
++##
++##  Specifically, runs subbytes on the high dword of %xmm0
++##  then rotates it by one byte and xors into the low dword of
++##  %xmm7.
++##
++##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
++##  next rcon.
++##
++##  Smears the dwords of %xmm7 by xoring the low into the
++##  second low, result into third, result into highest.
++##
++##  Returns results in %xmm7 = %xmm0.
++##  Clobbers %xmm1-%xmm4, %r11.
++##
++.align	4
++_vpaes_schedule_round:
++	# extract rcon from xmm8
++	#vxor	v4, v4, v4		# vpxor		%xmm4,	%xmm4,	%xmm4
++	?vsldoi	v1, $rcon, v9, 15	# vpalignr	\$15,	%xmm8,	%xmm4,	%xmm1
++	?vsldoi	$rcon, $rcon, $rcon, 15	# vpalignr	\$15,	%xmm8,	%xmm8,	%xmm8
++	vxor	v7, v7, v1		# vpxor		%xmm1,	%xmm7,	%xmm7
++
++	# rotate
++	?vspltw	v0, v0, 3		# vpshufd	\$0xFF,	%xmm0,	%xmm0
++	?vsldoi	v0, v0, v0, 1		# vpalignr	\$1,	%xmm0,	%xmm0,	%xmm0
++
++	# fall through...
++
++	# low round: same as high round, but no rotation and no rcon.
++_vpaes_schedule_low_round:
++	# smear xmm7
++	?vsldoi	v1, v9, v7, 12		# vpslldq	\$4,	%xmm7,	%xmm1
++	vxor	v7, v7, v1		# vpxor		%xmm1,	%xmm7,	%xmm7
++	vspltisb	v1, 0x0f	# 0x0f..0f
++	?vsldoi	v4, v9, v7, 8		# vpslldq	\$8,	%xmm7,	%xmm4
++
++	# subbytes
++	vand	v1, v1, v0		# vpand		%xmm9,	%xmm0,	%xmm1		# 0 = k
++	vsrb	v0, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0		# 1 = i
++	 vxor	v7, v7, v4		# vpxor		%xmm4,	%xmm7,	%xmm7
++	vperm	v2, $invhi, v9, v1	# vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
++	vxor	v1, v1, v0		# vpxor		%xmm0,	%xmm1,	%xmm1		# 0 = j
++	vperm	v3, $invlo, v9, v0	# vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
++	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
++	vperm	v4, $invlo, v9, v1	# vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
++	 vxor	v7, v7, v26		# vpxor		.Lk_s63(%rip),	%xmm7,	%xmm7
++	vperm	v3, $invlo, v9, v3	# vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
++	vxor	v4, v4, v2		# vpxor		%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
++	vperm	v2, $invlo, v9, v4	# vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
++	vxor	v3, v3, v1		# vpxor		%xmm1,	%xmm3,	%xmm3		# 2 = io
++	vxor	v2, v2, v0		# vpxor		%xmm0,	%xmm2,	%xmm2		# 3 = jo
++	vperm	v4, v15, v9, v3		# vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
++	vperm	v1, v14, v9, v2		# vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
++	vxor	v1, v1, v4		# vpxor		%xmm4,	%xmm1,	%xmm1		# 0 = sbox output
++
++	# add in smeared stuff
++	vxor	v0, v1, v7		# vpxor		%xmm7,	%xmm1,	%xmm0
++	vxor	v7, v1, v7		# vmovdqa	%xmm0,	%xmm7
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++##
++##  .aes_schedule_transform
++##
++##  Linear-transform %xmm0 according to tables at (%r11)
++##
++##  Requires that %xmm9 = 0x0F0F... as in preheat
++##  Output in %xmm0
++##  Clobbers %xmm2
++##
++.align	4
++_vpaes_schedule_transform:
++	#vand	v1, v0, v9		# vpand		%xmm9,	%xmm0,	%xmm1
++	vsrb	v2, v0, v8		# vpsrlb	\$4,	%xmm0,	%xmm0
++					# vmovdqa	(%r11),	%xmm2 	# lo
++	vperm	v0, $iptlo, $iptlo, v0	# vpshufb	%xmm1,	%xmm2,	%xmm2
++					# vmovdqa	16(%r11),	%xmm1 # hi
++	vperm	v2, $ipthi, $ipthi, v2	# vpshufb	%xmm0,	%xmm1,	%xmm0
++	vxor	v0, v0, v2		# vpxor		%xmm2,	%xmm0,	%xmm0
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++##
++##  .aes_schedule_mangle
++##
++##  Mangle xmm0 from (basis-transformed) standard version
++##  to our version.
++##
++##  On encrypt,
++##    xor with 0x63
++##    multiply by circulant 0,1,1,1
++##    apply shiftrows transform
++##
++##  On decrypt,
++##    xor with 0x63
++##    multiply by "inverse mixcolumns" circulant E,B,D,9
++##    deskew
++##    apply shiftrows transform
++##
++##
++##  Writes out to (%rdx), and increments or decrements it
++##  Keeps track of round number mod 4 in %r8
++##  Preserves xmm0
++##  Clobbers xmm1-xmm5
++##
++.align	4
++_vpaes_schedule_mangle:
++	#vmr	v4, v0			# vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
++					# vmovdqa	.Lk_mc_forward(%rip),%xmm5
++	bne	$dir, Lschedule_mangle_dec
++
++	# encrypting
++	vxor	v4, v0, v26		# vpxor	.Lk_s63(%rip),	%xmm0,	%xmm4
++	addi	$out, $out, 16		# add	\$16,	%rdx
++	vperm	v4, v4, v4, v25		# vpshufb	%xmm5,	%xmm4,	%xmm4
++	vperm	v1, v4, v4, v25		# vpshufb	%xmm5,	%xmm4,	%xmm1
++	vperm	v3, v1, v1, v25		# vpshufb	%xmm5,	%xmm1,	%xmm3
++	vxor	v4, v4, v1		# vpxor		%xmm1,	%xmm4,	%xmm4
++	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),	%xmm1
++	vxor	v3, v3, v4		# vpxor		%xmm4,	%xmm3,	%xmm3
++
++	vperm	v3, v3, v3, v1		# vpshufb	%xmm1,	%xmm3,	%xmm3
++	addi	r8, r8, -16		# add	\$-16,	%r8
++	andi.	r8, r8, 0x30		# and	\$0x30,	%r8
++
++	#stvx	v3, 0, $out		# vmovdqu	%xmm3,	(%rdx)
++	vperm	v1, v3, v3, $outperm	# rotate right/left
++	vsel	v2, $outhead, v1, $outmask
++	vmr	$outhead, v1
++	stvx	v2, 0, $out
++	blr
++
++.align	4
++Lschedule_mangle_dec:
++	# inverse mix columns
++					# lea	.Lk_dksd(%rip),%r11
++	vsrb	v1, v0, v8		# vpsrlb	\$4,	%xmm4,	%xmm1	# 1 = hi
++	#and	v4, v0, v9		# vpand		%xmm9,	%xmm4,	%xmm4	# 4 = lo
++
++					# vmovdqa	0x00(%r11),	%xmm2
++	vperm	v2, v16, v16, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
++					# vmovdqa	0x10(%r11),	%xmm3
++	vperm	v3, v17, v17, v1	# vpshufb	%xmm1,	%xmm3,	%xmm3
++	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3
++	vperm	v3, v3, v9, v25		# vpshufb	%xmm5,	%xmm3,	%xmm3
++
++					# vmovdqa	0x20(%r11),	%xmm2
++	vperm	v2, v18, v18, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
++	vxor	v2, v2, v3		# vpxor		%xmm3,	%xmm2,	%xmm2
++					# vmovdqa	0x30(%r11),	%xmm3
++	vperm	v3, v19, v19, v1	# vpshufb	%xmm1,	%xmm3,	%xmm3
++	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3
++	vperm	v3, v3, v9, v25		# vpshufb	%xmm5,	%xmm3,	%xmm3
++
++					# vmovdqa	0x40(%r11),	%xmm2
++	vperm	v2, v20, v20, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
++	vxor	v2, v2, v3		# vpxor		%xmm3,	%xmm2,	%xmm2
++					# vmovdqa	0x50(%r11),	%xmm3
++	vperm	v3, v21, v21, v1	# vpshufb	%xmm1,	%xmm3,	%xmm3
++	vxor	v3, v3, v2		# vpxor		%xmm2,	%xmm3,	%xmm3
++
++					# vmovdqa	0x60(%r11),	%xmm2
++	vperm	v2, v22, v22, v0	# vpshufb	%xmm4,	%xmm2,	%xmm2
++	vperm	v3, v3, v9, v25		# vpshufb	%xmm5,	%xmm3,	%xmm3
++					# vmovdqa	0x70(%r11),	%xmm4
++	vperm	v4, v23, v23, v1	# vpshufb	%xmm1,	%xmm4,	%xmm4
++	lvx	v1, r8, r10		# vmovdqa	(%r8,%r10),	%xmm1
++	vxor	v2, v2, v3		# vpxor		%xmm3,	%xmm2,	%xmm2
++	vxor	v3, v4, v2		# vpxor		%xmm2,	%xmm4,	%xmm3
++
++	addi	$out, $out, -16		# add	\$-16,	%rdx
++
++	vperm	v3, v3, v3, v1		# vpshufb	%xmm1,	%xmm3,	%xmm3
++	addi	r8, r8, -16		# add	\$-16,	%r8
++	andi.	r8, r8, 0x30		# and	\$0x30,	%r8
++
++	#stvx	v3, 0, $out		# vmovdqu	%xmm3,	(%rdx)
++	vperm	v1, v3, v3, $outperm	# rotate right/left
++	vsel	v2, $outhead, v1, $outmask
++	vmr	$outhead, v1
++	stvx	v2, 0, $out
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++
++.globl	.vpaes_set_encrypt_key
++.align	5
++.vpaes_set_encrypt_key:
++	$STU	$sp,-$FRAME($sp)
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mflr	r0
++	mfspr	r6, 256			# save vrsave
++	stvx	v20,r10,$sp
++	addi	r10,r10,32
++	stvx	v21,r11,$sp
++	addi	r11,r11,32
++	stvx	v22,r10,$sp
++	addi	r10,r10,32
++	stvx	v23,r11,$sp
++	addi	r11,r11,32
++	stvx	v24,r10,$sp
++	addi	r10,r10,32
++	stvx	v25,r11,$sp
++	addi	r11,r11,32
++	stvx	v26,r10,$sp
++	addi	r10,r10,32
++	stvx	v27,r11,$sp
++	addi	r11,r11,32
++	stvx	v28,r10,$sp
++	addi	r10,r10,32
++	stvx	v29,r11,$sp
++	addi	r11,r11,32
++	stvx	v30,r10,$sp
++	stvx	v31,r11,$sp
++	stw	r6,`$FRAME-4`($sp)	# save vrsave
++	li	r7, -1
++	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
++	mtspr	256, r7			# preserve all AltiVec registers
++
++	srwi	r9, $bits, 5		# shr	\$5,%eax
++	addi	r9, r9, 6		# add	\$5,%eax
++	stw	r9, 240($out)		# mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
++
++	cmplw	$dir, $bits, $bits	# set encrypt direction
++	li	r8, 0x30		# mov	\$0x30,%r8d
++	bl	_vpaes_schedule_core
++
++	$POP	r0, `$FRAME+$LRSAVE`($sp)
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mtspr	256, r6			# restore vrsave
++	mtlr	r0
++	xor	r3, r3, r3
++	lvx	v20,r10,$sp
++	addi	r10,r10,32
++	lvx	v21,r11,$sp
++	addi	r11,r11,32
++	lvx	v22,r10,$sp
++	addi	r10,r10,32
++	lvx	v23,r11,$sp
++	addi	r11,r11,32
++	lvx	v24,r10,$sp
++	addi	r10,r10,32
++	lvx	v25,r11,$sp
++	addi	r11,r11,32
++	lvx	v26,r10,$sp
++	addi	r10,r10,32
++	lvx	v27,r11,$sp
++	addi	r11,r11,32
++	lvx	v28,r10,$sp
++	addi	r10,r10,32
++	lvx	v29,r11,$sp
++	addi	r11,r11,32
++	lvx	v30,r10,$sp
++	lvx	v31,r11,$sp
++	addi	$sp,$sp,$FRAME
++	blr
++	.long	0
++	.byte	0,12,0x04,1,0x80,0,3,0
++	.long	0
++.size	.vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
++
++.globl	.vpaes_set_decrypt_key
++.align	4
++.vpaes_set_decrypt_key:
++	$STU	$sp,-$FRAME($sp)
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mflr	r0
++	mfspr	r6, 256			# save vrsave
++	stvx	v20,r10,$sp
++	addi	r10,r10,32
++	stvx	v21,r11,$sp
++	addi	r11,r11,32
++	stvx	v22,r10,$sp
++	addi	r10,r10,32
++	stvx	v23,r11,$sp
++	addi	r11,r11,32
++	stvx	v24,r10,$sp
++	addi	r10,r10,32
++	stvx	v25,r11,$sp
++	addi	r11,r11,32
++	stvx	v26,r10,$sp
++	addi	r10,r10,32
++	stvx	v27,r11,$sp
++	addi	r11,r11,32
++	stvx	v28,r10,$sp
++	addi	r10,r10,32
++	stvx	v29,r11,$sp
++	addi	r11,r11,32
++	stvx	v30,r10,$sp
++	stvx	v31,r11,$sp
++	stw	r6,`$FRAME-4`($sp)	# save vrsave
++	li	r7, -1
++	$PUSH	r0, `$FRAME+$LRSAVE`($sp)
++	mtspr	256, r7			# preserve all AltiVec registers
++
++	srwi	r9, $bits, 5		# shr	\$5,%eax
++	addi	r9, r9, 6		# add	\$5,%eax
++	stw	r9, 240($out)		# mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
++
++	slwi	r9, r9, 4		# shl	\$4,%eax
++	add	$out, $out, r9		# lea	(%rdx,%rax),%rdx
++
++	cmplwi	$dir, $bits, 0		# set decrypt direction
++	srwi	r8, $bits, 1		# shr	\$1,%r8d
++	andi.	r8, r8, 32		# and	\$32,%r8d
++	xori	r8, r8, 32		# xor	\$32,%r8d	# nbits==192?0:32
++	bl	_vpaes_schedule_core
++
++	$POP	r0,  `$FRAME+$LRSAVE`($sp)
++	li	r10,`15+6*$SIZE_T`
++	li	r11,`31+6*$SIZE_T`
++	mtspr	256, r6			# restore vrsave
++	mtlr	r0
++	xor	r3, r3, r3
++	lvx	v20,r10,$sp
++	addi	r10,r10,32
++	lvx	v21,r11,$sp
++	addi	r11,r11,32
++	lvx	v22,r10,$sp
++	addi	r10,r10,32
++	lvx	v23,r11,$sp
++	addi	r11,r11,32
++	lvx	v24,r10,$sp
++	addi	r10,r10,32
++	lvx	v25,r11,$sp
++	addi	r11,r11,32
++	lvx	v26,r10,$sp
++	addi	r10,r10,32
++	lvx	v27,r11,$sp
++	addi	r11,r11,32
++	lvx	v28,r10,$sp
++	addi	r10,r10,32
++	lvx	v29,r11,$sp
++	addi	r11,r11,32
++	lvx	v30,r10,$sp
++	lvx	v31,r11,$sp
++	addi	$sp,$sp,$FRAME
++	blr
++	.long	0
++	.byte	0,12,0x04,1,0x80,0,3,0
++	.long	0
++.size	.vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
++___
++}
++
++my $consts=1;
++foreach  (split("\n",$code)) {
++	s/\`([^\`]*)\`/eval $1/geo;
++
++	# constants table endian-specific conversion
++	if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
++	    my $conv=$2;
++	    my @bytes=();
++
++	    # convert to endian-agnostic format
++	    foreach (split(/,\s+/,$1)) {
++		my $l = /^0/?oct:int;
++		push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
++	    }
++
++	    # little-endian conversion
++	    if ($flavour =~ /le$/o) {
++		SWITCH: for($conv)  {
++		    /\?inv/ && do   { @bytes=map($_^0xf,@bytes); last; };
++		    /\?rev/ && do   { @bytes=reverse(@bytes);    last; }; 
++		}
++	    }
++
++	    #emit
++	    print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
++	    next;
++	}
++	$consts=0 if (m/Lconsts:/o);	# end of table
++
++	# instructions prefixed with '?' are endian-specific and need
++	# to be adjusted accordingly...
++	if ($flavour =~ /le$/o) {	# little-endian
++	    s/\?lvsr/lvsl/o or
++	    s/\?lvsl/lvsr/o or
++	    s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
++	    s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
++	    s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
++	} else {			# big-endian
++	    s/\?([a-z]+)/$1/o;
++	}
++
++	print $_,"\n";
++}
++
++close STDOUT;
+diff --git a/crypto/aes/asm/vpaes-x86_64.pl b/crypto/aes/asm/vpaes-x86_64.pl
+index 41f2e46..bd7f45b 100644
+--- a/crypto/aes/asm/vpaes-x86_64.pl
++++ b/crypto/aes/asm/vpaes-x86_64.pl
+@@ -1060,7 +1060,7 @@ _vpaes_consts:
+ .Lk_dsbo:	# decryption sbox final output
+ 	.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+ 	.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+-.asciz	"Vector Permutaion AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
++.asciz	"Vector Permutation AES for x86_64/SSSE3, Mike Hamburg (Stanford University)"
+ .align	64
+ .size	_vpaes_consts,.-_vpaes_consts
+ ___
+diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl
+index f9b6992..da69c6a 100644
+--- a/crypto/bn/asm/ppc-mont.pl
++++ b/crypto/bn/asm/ppc-mont.pl
+@@ -325,6 +325,7 @@ Lcopy:				; copy or in-place refresh
+ 	.long	0
+ 	.byte	0,12,4,0,0x80,12,6,0
+ 	.long	0
++.size	.bn_mul_mont_int,.-.bn_mul_mont_int
+ 
+ .asciz  "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
+ ___
+diff --git a/crypto/bn/asm/ppc.pl b/crypto/bn/asm/ppc.pl
+index 1249ce2..04df1fe 100644
+--- a/crypto/bn/asm/ppc.pl
++++ b/crypto/bn/asm/ppc.pl
+@@ -392,6 +392,7 @@ $data=<<EOF;
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,2,0
+ 	.long	0
++.size	.bn_sqr_comba4,.-.bn_sqr_comba4
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -819,6 +820,7 @@ $data=<<EOF;
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,2,0
+ 	.long	0
++.size	.bn_sqr_comba8,.-.bn_sqr_comba8
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -972,6 +974,7 @@ $data=<<EOF;
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,3,0
+ 	.long	0
++.size	.bn_mul_comba4,.-.bn_mul_comba4
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -1510,6 +1513,7 @@ $data=<<EOF;
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,3,0
+ 	.long	0
++.size	.bn_mul_comba8,.-.bn_mul_comba8
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -1560,6 +1564,7 @@ Lppcasm_sub_adios:
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,4,0
+ 	.long	0
++.size	.bn_sub_words,.-.bn_sub_words
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -1605,6 +1610,7 @@ Lppcasm_add_adios:
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,4,0
+ 	.long	0
++.size	.bn_add_words,.-.bn_add_words
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -1720,6 +1726,7 @@ Lppcasm_div9:
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,3,0
+ 	.long	0
++.size	.bn_div_words,.-.bn_div_words
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -1761,6 +1768,7 @@ Lppcasm_sqr_adios:
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,3,0
+ 	.long	0
++.size	.bn_sqr_words,.-.bn_sqr_words
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -1866,6 +1874,7 @@ Lppcasm_mw_OVER:
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,4,0
+ 	.long	0
++.size	bn_mul_words,.-bn_mul_words
+ 
+ #
+ #	NOTE:	The following label name should be changed to
+@@ -1991,6 +2000,7 @@ Lppcasm_maw_adios:
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,4,0
+ 	.long	0
++.size	.bn_mul_add_words,.-.bn_mul_add_words
+ 	.align	4
+ EOF
+ $data =~ s/\`([^\`]*)\`/eval $1/gem;
+diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl
+index a14e769..68e3733 100644
+--- a/crypto/bn/asm/ppc64-mont.pl
++++ b/crypto/bn/asm/ppc64-mont.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+ 
+ # ====================================================================
+-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -65,6 +65,14 @@
+ # others alternative would be to break dependence on upper halves of
+ # GPRs by sticking to 32-bit integer operations...
+ 
++# December 2012
++
++# Remove above mentioned dependence on GPRs' upper halves in 32-bit
++# build. No signal masking overhead, but integer instructions are
++# *more* numerous... It's still "universally" faster than 32-bit
++# ppc-mont.pl, but improvement coefficient is not as impressive
++# for longer keys...
++
+ $flavour = shift;
+ 
+ if ($flavour =~ /32/) {
+@@ -110,6 +118,9 @@ $tp="r10";
+ $j="r11";
+ $i="r12";
+ # non-volatile registers
++$c1="r19";
++$n1="r20";
++$a1="r21";
+ $nap_d="r22";	# interleaved ap and np in double format
+ $a0="r23";	# ap[0]
+ $t0="r24";	# temporary registers
+@@ -180,8 +191,8 @@ $T3a="f30";	$T3b="f31";
+ #		.				.
+ #		+-------------------------------+
+ #		.				.
+-#   -12*size_t	+-------------------------------+
+-#		| 10 saved gpr, r22-r31		|
++#   -13*size_t	+-------------------------------+
++#		| 13 saved gpr, r19-r31		|
+ #		.				.
+ #		.				.
+ #   -12*8	+-------------------------------+
+@@ -215,6 +226,9 @@ $code=<<___;
+ 	mr	$i,$sp
+ 	$STUX	$sp,$sp,$tp	; alloca
+ 
++	$PUSH	r19,`-12*8-13*$SIZE_T`($i)
++	$PUSH	r20,`-12*8-12*$SIZE_T`($i)
++	$PUSH	r21,`-12*8-11*$SIZE_T`($i)
+ 	$PUSH	r22,`-12*8-10*$SIZE_T`($i)
+ 	$PUSH	r23,`-12*8-9*$SIZE_T`($i)
+ 	$PUSH	r24,`-12*8-8*$SIZE_T`($i)
+@@ -237,40 +251,26 @@ $code=<<___;
+ 	stfd	f29,`-3*8`($i)
+ 	stfd	f30,`-2*8`($i)
+ 	stfd	f31,`-1*8`($i)
+-___
+-$code.=<<___ if ($SIZE_T==8);
+-	ld	$a0,0($ap)	; pull ap[0] value
+-	ld	$n0,0($n0)	; pull n0[0] value
+-	ld	$t3,0($bp)	; bp[0]
+-___
+-$code.=<<___ if ($SIZE_T==4);
+-	mr	$t1,$n0
+-	lwz	$a0,0($ap)	; pull ap[0,1] value
+-	lwz	$t0,4($ap)
+-	lwz	$n0,0($t1)	; pull n0[0,1] value
+-	lwz	$t1,4($t1)
+-	lwz	$t3,0($bp)	; bp[0,1]
+-	lwz	$t2,4($bp)
+-	insrdi	$a0,$t0,32,0
+-	insrdi	$n0,$t1,32,0
+-	insrdi	$t3,$t2,32,0
+-___
+-$code.=<<___;
++
+ 	addi	$tp,$sp,`$FRAME+$TRANSFER+8+64`
+ 	li	$i,-64
+ 	add	$nap_d,$tp,$num
+ 	and	$nap_d,$nap_d,$i	; align to 64 bytes
+-
+-	mulld	$t7,$a0,$t3	; ap[0]*bp[0]
+ 	; nap_d is off by 1, because it's used with stfdu/lfdu
+ 	addi	$nap_d,$nap_d,-8
+ 	srwi	$j,$num,`3+1`	; counter register, num/2
+-	mulld	$t7,$t7,$n0	; tp[0]*n0
+ 	addi	$j,$j,-1
+ 	addi	$tp,$sp,`$FRAME+$TRANSFER-8`
+ 	li	$carry,0
+ 	mtctr	$j
++___
++
++$code.=<<___ if ($SIZE_T==8);
++	ld	$a0,0($ap)		; pull ap[0] value
++	ld	$t3,0($bp)		; bp[0]
++	ld	$n0,0($n0)		; pull n0[0] value
+ 
++	mulld	$t7,$a0,$t3		; ap[0]*bp[0]
+ 	; transfer bp[0] to FPU as 4x16-bit values
+ 	extrdi	$t0,$t3,16,48
+ 	extrdi	$t1,$t3,16,32
+@@ -280,6 +280,8 @@ $code.=<<___;
+ 	std	$t1,`$FRAME+8`($sp)
+ 	std	$t2,`$FRAME+16`($sp)
+ 	std	$t3,`$FRAME+24`($sp)
++
++	mulld	$t7,$t7,$n0		; tp[0]*n0
+ 	; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
+ 	extrdi	$t4,$t7,16,48
+ 	extrdi	$t5,$t7,16,32
+@@ -289,21 +291,61 @@ $code.=<<___;
+ 	std	$t5,`$FRAME+40`($sp)
+ 	std	$t6,`$FRAME+48`($sp)
+ 	std	$t7,`$FRAME+56`($sp)
+-___
+-$code.=<<___ if ($SIZE_T==8);
+-	lwz	$t0,4($ap)		; load a[j] as 32-bit word pair
+-	lwz	$t1,0($ap)
+-	lwz	$t2,12($ap)		; load a[j+1] as 32-bit word pair
++
++	extrdi	$t0,$a0,32,32		; lwz	$t0,4($ap)
++	extrdi	$t1,$a0,32,0		; lwz	$t1,0($ap)
++	lwz	$t2,12($ap)		; load a[1] as 32-bit word pair
+ 	lwz	$t3,8($ap)
+-	lwz	$t4,4($np)		; load n[j] as 32-bit word pair
++	lwz	$t4,4($np)		; load n[0] as 32-bit word pair
+ 	lwz	$t5,0($np)
+-	lwz	$t6,12($np)		; load n[j+1] as 32-bit word pair
++	lwz	$t6,12($np)		; load n[1] as 32-bit word pair
+ 	lwz	$t7,8($np)
+ ___
+ $code.=<<___ if ($SIZE_T==4);
+-	lwz	$t0,0($ap)		; load a[j..j+3] as 32-bit word pairs
+-	lwz	$t1,4($ap)
+-	lwz	$t2,8($ap)
++	lwz	$a0,0($ap)		; pull ap[0,1] value
++	mr	$n1,$n0
++	lwz	$a1,4($ap)
++	li	$c1,0
++	lwz	$t1,0($bp)		; bp[0,1]
++	lwz	$t3,4($bp)
++	lwz	$n0,0($n1)		; pull n0[0,1] value
++	lwz	$n1,4($n1)
++
++	mullw	$t4,$a0,$t1		; mulld ap[0]*bp[0]
++	mulhwu	$t5,$a0,$t1
++	mullw	$t6,$a1,$t1
++	mullw	$t7,$a0,$t3
++	add	$t5,$t5,$t6
++	add	$t5,$t5,$t7
++	; transfer bp[0] to FPU as 4x16-bit values
++	extrwi	$t0,$t1,16,16
++	extrwi	$t1,$t1,16,0
++	extrwi	$t2,$t3,16,16
++	extrwi	$t3,$t3,16,0
++	std	$t0,`$FRAME+0`($sp)	; yes, std in 32-bit build
++	std	$t1,`$FRAME+8`($sp)
++	std	$t2,`$FRAME+16`($sp)
++	std	$t3,`$FRAME+24`($sp)
++
++	mullw	$t0,$t4,$n0		; mulld tp[0]*n0
++	mulhwu	$t1,$t4,$n0
++	mullw	$t2,$t5,$n0
++	mullw	$t3,$t4,$n1
++	add	$t1,$t1,$t2
++	add	$t1,$t1,$t3
++	; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
++	extrwi	$t4,$t0,16,16
++	extrwi	$t5,$t0,16,0
++	extrwi	$t6,$t1,16,16
++	extrwi	$t7,$t1,16,0
++	std	$t4,`$FRAME+32`($sp)	; yes, std in 32-bit build
++	std	$t5,`$FRAME+40`($sp)
++	std	$t6,`$FRAME+48`($sp)
++	std	$t7,`$FRAME+56`($sp)
++
++	mr	$t0,$a0			; lwz	$t0,0($ap)
++	mr	$t1,$a1			; lwz	$t1,4($ap)
++	lwz	$t2,8($ap)		; load a[j..j+3] as 32-bit word pairs
+ 	lwz	$t3,12($ap)
+ 	lwz	$t4,0($np)		; load n[j..j+3] as 32-bit word pairs
+ 	lwz	$t5,4($np)
+@@ -319,7 +361,7 @@ $code.=<<___;
+ 	lfd	$nb,`$FRAME+40`($sp)
+ 	lfd	$nc,`$FRAME+48`($sp)
+ 	lfd	$nd,`$FRAME+56`($sp)
+-	std	$t0,`$FRAME+64`($sp)
++	std	$t0,`$FRAME+64`($sp)	; yes, std even in 32-bit build
+ 	std	$t1,`$FRAME+72`($sp)
+ 	std	$t2,`$FRAME+80`($sp)
+ 	std	$t3,`$FRAME+88`($sp)
+@@ -441,7 +483,7 @@ $code.=<<___ if ($SIZE_T==4);
+ 	lwz	$t7,12($np)
+ ___
+ $code.=<<___;
+-	std	$t0,`$FRAME+64`($sp)
++	std	$t0,`$FRAME+64`($sp)	; yes, std even in 32-bit build
+ 	std	$t1,`$FRAME+72`($sp)
+ 	std	$t2,`$FRAME+80`($sp)
+ 	std	$t3,`$FRAME+88`($sp)
+@@ -449,6 +491,9 @@ $code.=<<___;
+ 	std	$t5,`$FRAME+104`($sp)
+ 	std	$t6,`$FRAME+112`($sp)
+ 	std	$t7,`$FRAME+120`($sp)
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ 	ld	$t0,`$FRAME+0`($sp)
+ 	ld	$t1,`$FRAME+8`($sp)
+ 	ld	$t2,`$FRAME+16`($sp)
+@@ -457,6 +502,20 @@ $code.=<<___;
+ 	ld	$t5,`$FRAME+40`($sp)
+ 	ld	$t6,`$FRAME+48`($sp)
+ 	ld	$t7,`$FRAME+56`($sp)
++___
++} else {
++$code.=<<___;
++	lwz	$t1,`$FRAME+0`($sp)
++	lwz	$t0,`$FRAME+4`($sp)
++	lwz	$t3,`$FRAME+8`($sp)
++	lwz	$t2,`$FRAME+12`($sp)
++	lwz	$t5,`$FRAME+16`($sp)
++	lwz	$t4,`$FRAME+20`($sp)
++	lwz	$t7,`$FRAME+24`($sp)
++	lwz	$t6,`$FRAME+28`($sp)
++___
++}
++$code.=<<___;
+ 	lfd	$A0,`$FRAME+64`($sp)
+ 	lfd	$A1,`$FRAME+72`($sp)
+ 	lfd	$A2,`$FRAME+80`($sp)
+@@ -488,7 +547,9 @@ $code.=<<___;
+ 	fmadd	$T0b,$A0,$bb,$dotb
+ 	stfd	$A2,24($nap_d)		; save a[j+1] in double format
+ 	stfd	$A3,32($nap_d)
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ 	fmadd	$T1a,$A0,$bc,$T1a
+ 	fmadd	$T1b,$A0,$bd,$T1b
+ 	fmadd	$T2a,$A1,$bc,$T2a
+@@ -561,11 +622,123 @@ $code.=<<___;
+ 	stfd	$T3b,`$FRAME+56`($sp)
+ 	 std	$t0,8($tp)		; tp[j-1]
+ 	 stdu	$t4,16($tp)		; tp[j]
++___
++} else {
++$code.=<<___;
++	fmadd	$T1a,$A0,$bc,$T1a
++	fmadd	$T1b,$A0,$bd,$T1b
++	 addc	$t0,$t0,$carry
++	 adde	$t1,$t1,$c1
++	 srwi	$carry,$t0,16
++	fmadd	$T2a,$A1,$bc,$T2a
++	fmadd	$T2b,$A1,$bd,$T2b
++	stfd	$N0,40($nap_d)		; save n[j] in double format
++	stfd	$N1,48($nap_d)
++	 srwi	$c1,$t1,16
++	 insrwi	$carry,$t1,16,0
++	fmadd	$T3a,$A2,$bc,$T3a
++	fmadd	$T3b,$A2,$bd,$T3b
++	 addc	$t2,$t2,$carry
++	 adde	$t3,$t3,$c1
++	 srwi	$carry,$t2,16
++	fmul	$dota,$A3,$bc
++	fmul	$dotb,$A3,$bd
++	stfd	$N2,56($nap_d)		; save n[j+1] in double format
++	stfdu	$N3,64($nap_d)
++	 insrwi	$t0,$t2,16,0		; 0..31 bits
++	 srwi	$c1,$t3,16
++	 insrwi	$carry,$t3,16,0
++
++	fmadd	$T1a,$N1,$na,$T1a
++	fmadd	$T1b,$N1,$nb,$T1b
++	 lwz	$t3,`$FRAME+32`($sp)	; permuted $t1
++	 lwz	$t2,`$FRAME+36`($sp)	; permuted $t0
++	 addc	$t4,$t4,$carry
++	 adde	$t5,$t5,$c1
++	 srwi	$carry,$t4,16
++	fmadd	$T2a,$N2,$na,$T2a
++	fmadd	$T2b,$N2,$nb,$T2b
++	 srwi	$c1,$t5,16
++	 insrwi	$carry,$t5,16,0
++	fmadd	$T3a,$N3,$na,$T3a
++	fmadd	$T3b,$N3,$nb,$T3b
++	 addc	$t6,$t6,$carry
++	 adde	$t7,$t7,$c1
++	 srwi	$carry,$t6,16
++	fmadd	$T0a,$N0,$na,$T0a
++	fmadd	$T0b,$N0,$nb,$T0b
++	 insrwi	$t4,$t6,16,0		; 32..63 bits
++	 srwi	$c1,$t7,16
++	 insrwi	$carry,$t7,16,0
++
++	fmadd	$T1a,$N0,$nc,$T1a
++	fmadd	$T1b,$N0,$nd,$T1b
++	 lwz	$t7,`$FRAME+40`($sp)	; permuted $t3
++	 lwz	$t6,`$FRAME+44`($sp)	; permuted $t2
++	 addc	$t2,$t2,$carry
++	 adde	$t3,$t3,$c1
++	 srwi	$carry,$t2,16
++	fmadd	$T2a,$N1,$nc,$T2a
++	fmadd	$T2b,$N1,$nd,$T2b
++	 stw	$t0,12($tp)		; tp[j-1]
++	 stw	$t4,8($tp)
++	 srwi	$c1,$t3,16
++	 insrwi	$carry,$t3,16,0
++	fmadd	$T3a,$N2,$nc,$T3a
++	fmadd	$T3b,$N2,$nd,$T3b
++	 lwz	$t1,`$FRAME+48`($sp)	; permuted $t5
++	 lwz	$t0,`$FRAME+52`($sp)	; permuted $t4
++	 addc	$t6,$t6,$carry
++	 adde	$t7,$t7,$c1
++	 srwi	$carry,$t6,16
++	fmadd	$dota,$N3,$nc,$dota
++	fmadd	$dotb,$N3,$nd,$dotb
++	 insrwi	$t2,$t6,16,0		; 64..95 bits
++	 srwi	$c1,$t7,16
++	 insrwi	$carry,$t7,16,0
++
++	fctid	$T0a,$T0a
++	fctid	$T0b,$T0b
++	 lwz	$t5,`$FRAME+56`($sp)	; permuted $t7
++	 lwz	$t4,`$FRAME+60`($sp)	; permuted $t6
++	 addc	$t0,$t0,$carry
++	 adde	$t1,$t1,$c1
++	 srwi	$carry,$t0,16
++	fctid	$T1a,$T1a
++	fctid	$T1b,$T1b
++	 srwi	$c1,$t1,16
++	 insrwi	$carry,$t1,16,0
++	fctid	$T2a,$T2a
++	fctid	$T2b,$T2b
++	 addc	$t4,$t4,$carry
++	 adde	$t5,$t5,$c1
++	 srwi	$carry,$t4,16
++	fctid	$T3a,$T3a
++	fctid	$T3b,$T3b
++	 insrwi	$t0,$t4,16,0		; 96..127 bits
++	 srwi	$c1,$t5,16
++	 insrwi	$carry,$t5,16,0
++
++	stfd	$T0a,`$FRAME+0`($sp)
++	stfd	$T0b,`$FRAME+8`($sp)
++	stfd	$T1a,`$FRAME+16`($sp)
++	stfd	$T1b,`$FRAME+24`($sp)
++	stfd	$T2a,`$FRAME+32`($sp)
++	stfd	$T2b,`$FRAME+40`($sp)
++	stfd	$T3a,`$FRAME+48`($sp)
++	stfd	$T3b,`$FRAME+56`($sp)
++	 stw	$t2,20($tp)		; tp[j]
++	 stwu	$t0,16($tp)
++___
++}
++$code.=<<___;
+ 	bdnz-	L1st
+ 
+ 	fctid	$dota,$dota
+ 	fctid	$dotb,$dotb
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ 	ld	$t0,`$FRAME+0`($sp)
+ 	ld	$t1,`$FRAME+8`($sp)
+ 	ld	$t2,`$FRAME+16`($sp)
+@@ -611,33 +784,117 @@ $code.=<<___;
+ 	insrdi	$t6,$t7,48,0
+ 	srdi	$ovf,$t7,48
+ 	std	$t6,8($tp)		; tp[num-1]
++___
++} else {
++$code.=<<___;
++	lwz	$t1,`$FRAME+0`($sp)
++	lwz	$t0,`$FRAME+4`($sp)
++	lwz	$t3,`$FRAME+8`($sp)
++	lwz	$t2,`$FRAME+12`($sp)
++	lwz	$t5,`$FRAME+16`($sp)
++	lwz	$t4,`$FRAME+20`($sp)
++	lwz	$t7,`$FRAME+24`($sp)
++	lwz	$t6,`$FRAME+28`($sp)
++	stfd	$dota,`$FRAME+64`($sp)
++	stfd	$dotb,`$FRAME+72`($sp)
+ 
++	addc	$t0,$t0,$carry
++	adde	$t1,$t1,$c1
++	srwi	$carry,$t0,16
++	insrwi	$carry,$t1,16,0
++	srwi	$c1,$t1,16
++	addc	$t2,$t2,$carry
++	adde	$t3,$t3,$c1
++	srwi	$carry,$t2,16
++	 insrwi	$t0,$t2,16,0		; 0..31 bits
++	insrwi	$carry,$t3,16,0
++	srwi	$c1,$t3,16
++	addc	$t4,$t4,$carry
++	adde	$t5,$t5,$c1
++	srwi	$carry,$t4,16
++	insrwi	$carry,$t5,16,0
++	srwi	$c1,$t5,16
++	addc	$t6,$t6,$carry
++	adde	$t7,$t7,$c1
++	srwi	$carry,$t6,16
++	 insrwi	$t4,$t6,16,0		; 32..63 bits
++	insrwi	$carry,$t7,16,0
++	srwi	$c1,$t7,16
++	 stw	$t0,12($tp)		; tp[j-1]
++	 stw	$t4,8($tp)
++
++	lwz	$t3,`$FRAME+32`($sp)	; permuted $t1
++	lwz	$t2,`$FRAME+36`($sp)	; permuted $t0
++	lwz	$t7,`$FRAME+40`($sp)	; permuted $t3
++	lwz	$t6,`$FRAME+44`($sp)	; permuted $t2
++	lwz	$t1,`$FRAME+48`($sp)	; permuted $t5
++	lwz	$t0,`$FRAME+52`($sp)	; permuted $t4
++	lwz	$t5,`$FRAME+56`($sp)	; permuted $t7
++	lwz	$t4,`$FRAME+60`($sp)	; permuted $t6
++
++	addc	$t2,$t2,$carry
++	adde	$t3,$t3,$c1
++	srwi	$carry,$t2,16
++	insrwi	$carry,$t3,16,0
++	srwi	$c1,$t3,16
++	addc	$t6,$t6,$carry
++	adde	$t7,$t7,$c1
++	srwi	$carry,$t6,16
++	 insrwi	$t2,$t6,16,0		; 64..95 bits
++	insrwi	$carry,$t7,16,0
++	srwi	$c1,$t7,16
++	addc	$t0,$t0,$carry
++	adde	$t1,$t1,$c1
++	srwi	$carry,$t0,16
++	insrwi	$carry,$t1,16,0
++	srwi	$c1,$t1,16
++	addc	$t4,$t4,$carry
++	adde	$t5,$t5,$c1
++	srwi	$carry,$t4,16
++	 insrwi	$t0,$t4,16,0		; 96..127 bits
++	insrwi	$carry,$t5,16,0
++	srwi	$c1,$t5,16
++	 stw	$t2,20($tp)		; tp[j]
++	 stwu	$t0,16($tp)
++
++	lwz	$t7,`$FRAME+64`($sp)
++	lwz	$t6,`$FRAME+68`($sp)
++	lwz	$t5,`$FRAME+72`($sp)
++	lwz	$t4,`$FRAME+76`($sp)
++
++	addc	$t6,$t6,$carry
++	adde	$t7,$t7,$c1
++	srwi	$carry,$t6,16
++	insrwi	$carry,$t7,16,0
++	srwi	$c1,$t7,16
++	addc	$t4,$t4,$carry
++	adde	$t5,$t5,$c1
++
++	insrwi	$t6,$t4,16,0
++	srwi	$t4,$t4,16
++	insrwi	$t4,$t5,16,0
++	srwi	$ovf,$t5,16
++	stw	$t6,12($tp)		; tp[num-1]
++	stw	$t4,8($tp)
++___
++}
++$code.=<<___;
+ 	slwi	$t7,$num,2
+ 	subf	$nap_d,$t7,$nap_d	; rewind pointer
+ 
+ 	li	$i,8			; i=1
+ .align	5
+ Louter:
+-___
+-$code.=<<___ if ($SIZE_T==8);
+-	ldx	$t3,$bp,$i	; bp[i]
+-___
+-$code.=<<___ if ($SIZE_T==4);
+-	add	$t0,$bp,$i
+-	lwz	$t3,0($t0)		; bp[i,i+1]
+-	lwz	$t0,4($t0)
+-	insrdi	$t3,$t0,32,0
+-___
+-$code.=<<___;
+-	ld	$t6,`$FRAME+$TRANSFER+8`($sp)	; tp[0]
+-	mulld	$t7,$a0,$t3	; ap[0]*bp[i]
+-
+ 	addi	$tp,$sp,`$FRAME+$TRANSFER`
+-	add	$t7,$t7,$t6	; ap[0]*bp[i]+tp[0]
+ 	li	$carry,0
+-	mulld	$t7,$t7,$n0	; tp[0]*n0
+ 	mtctr	$j
++___
++$code.=<<___ if ($SIZE_T==8);
++	ldx	$t3,$bp,$i		; bp[i]
+ 
++	ld	$t6,`$FRAME+$TRANSFER+8`($sp)	; tp[0]
++	mulld	$t7,$a0,$t3		; ap[0]*bp[i]
++	add	$t7,$t7,$t6		; ap[0]*bp[i]+tp[0]
+ 	; transfer bp[i] to FPU as 4x16-bit values
+ 	extrdi	$t0,$t3,16,48
+ 	extrdi	$t1,$t3,16,32
+@@ -647,6 +904,8 @@ $code.=<<___;
+ 	std	$t1,`$FRAME+8`($sp)
+ 	std	$t2,`$FRAME+16`($sp)
+ 	std	$t3,`$FRAME+24`($sp)
++
++	mulld	$t7,$t7,$n0		; tp[0]*n0
+ 	; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
+ 	extrdi	$t4,$t7,16,48
+ 	extrdi	$t5,$t7,16,32
+@@ -656,7 +915,50 @@ $code.=<<___;
+ 	std	$t5,`$FRAME+40`($sp)
+ 	std	$t6,`$FRAME+48`($sp)
+ 	std	$t7,`$FRAME+56`($sp)
++___
++$code.=<<___ if ($SIZE_T==4);
++	add	$t0,$bp,$i
++	li	$c1,0
++	lwz	$t1,0($t0)		; bp[i,i+1]
++	lwz	$t3,4($t0)
++
++	mullw	$t4,$a0,$t1		; ap[0]*bp[i]
++	lwz	$t0,`$FRAME+$TRANSFER+8+4`($sp)	; tp[0]
++	mulhwu	$t5,$a0,$t1
++	lwz	$t2,`$FRAME+$TRANSFER+8`($sp)	; tp[0]
++	mullw	$t6,$a1,$t1
++	mullw	$t7,$a0,$t3
++	add	$t5,$t5,$t6
++	add	$t5,$t5,$t7
++	addc	$t4,$t4,$t0		; ap[0]*bp[i]+tp[0]
++	adde	$t5,$t5,$t2
++	; transfer bp[i] to FPU as 4x16-bit values
++	extrwi	$t0,$t1,16,16
++	extrwi	$t1,$t1,16,0
++	extrwi	$t2,$t3,16,16
++	extrwi	$t3,$t3,16,0
++	std	$t0,`$FRAME+0`($sp)	; yes, std in 32-bit build
++	std	$t1,`$FRAME+8`($sp)
++	std	$t2,`$FRAME+16`($sp)
++	std	$t3,`$FRAME+24`($sp)
+ 
++	mullw	$t0,$t4,$n0		; mulld tp[0]*n0
++	mulhwu	$t1,$t4,$n0
++	mullw	$t2,$t5,$n0
++	mullw	$t3,$t4,$n1
++	add	$t1,$t1,$t2
++	add	$t1,$t1,$t3
++	; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
++	extrwi	$t4,$t0,16,16
++	extrwi	$t5,$t0,16,0
++	extrwi	$t6,$t1,16,16
++	extrwi	$t7,$t1,16,0
++	std	$t4,`$FRAME+32`($sp)	; yes, std in 32-bit build
++	std	$t5,`$FRAME+40`($sp)
++	std	$t6,`$FRAME+48`($sp)
++	std	$t7,`$FRAME+56`($sp)
++___
++$code.=<<___;
+ 	lfd	$A0,8($nap_d)		; load a[j] in double format
+ 	lfd	$A1,16($nap_d)
+ 	lfd	$A2,24($nap_d)		; load a[j+1] in double format
+@@ -769,7 +1071,9 @@ Linner:
+ 	fmul	$dotb,$A3,$bd
+ 	 lfd	$A2,24($nap_d)		; load a[j+1] in double format
+ 	 lfd	$A3,32($nap_d)
+-
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ 	fmadd	$T1a,$N1,$na,$T1a
+ 	fmadd	$T1b,$N1,$nb,$T1b
+ 	 ld	$t0,`$FRAME+0`($sp)
+@@ -856,10 +1160,131 @@ $code.=<<___;
+ 	 addze	$carry,$carry
+ 	 std	$t3,-16($tp)		; tp[j-1]
+ 	 std	$t5,-8($tp)		; tp[j]
++___
++} else {
++$code.=<<___;
++	fmadd	$T1a,$N1,$na,$T1a
++	fmadd	$T1b,$N1,$nb,$T1b
++	 lwz	$t1,`$FRAME+0`($sp)
++	 lwz	$t0,`$FRAME+4`($sp)
++	fmadd	$T2a,$N2,$na,$T2a
++	fmadd	$T2b,$N2,$nb,$T2b
++	 lwz	$t3,`$FRAME+8`($sp)
++	 lwz	$t2,`$FRAME+12`($sp)
++	fmadd	$T3a,$N3,$na,$T3a
++	fmadd	$T3b,$N3,$nb,$T3b
++	 lwz	$t5,`$FRAME+16`($sp)
++	 lwz	$t4,`$FRAME+20`($sp)
++	 addc	$t0,$t0,$carry
++	 adde	$t1,$t1,$c1
++	 srwi	$carry,$t0,16
++	fmadd	$T0a,$N0,$na,$T0a
++	fmadd	$T0b,$N0,$nb,$T0b
++	 lwz	$t7,`$FRAME+24`($sp)
++	 lwz	$t6,`$FRAME+28`($sp)
++	 srwi	$c1,$t1,16
++	 insrwi	$carry,$t1,16,0
++
++	fmadd	$T1a,$N0,$nc,$T1a
++	fmadd	$T1b,$N0,$nd,$T1b
++	 addc	$t2,$t2,$carry
++	 adde	$t3,$t3,$c1
++	 srwi	$carry,$t2,16
++	fmadd	$T2a,$N1,$nc,$T2a
++	fmadd	$T2b,$N1,$nd,$T2b
++	 insrwi	$t0,$t2,16,0		; 0..31 bits
++	 srwi	$c1,$t3,16
++	 insrwi	$carry,$t3,16,0
++	fmadd	$T3a,$N2,$nc,$T3a
++	fmadd	$T3b,$N2,$nd,$T3b
++	 lwz	$t2,12($tp)		; tp[j]
++	 lwz	$t3,8($tp)
++	 addc	$t4,$t4,$carry
++	 adde	$t5,$t5,$c1
++	 srwi	$carry,$t4,16
++	fmadd	$dota,$N3,$nc,$dota
++	fmadd	$dotb,$N3,$nd,$dotb
++	 srwi	$c1,$t5,16
++	 insrwi	$carry,$t5,16,0
++
++	fctid	$T0a,$T0a
++	 addc	$t6,$t6,$carry
++	 adde	$t7,$t7,$c1
++	 srwi	$carry,$t6,16
++	fctid	$T0b,$T0b
++	 insrwi	$t4,$t6,16,0		; 32..63 bits
++	 srwi	$c1,$t7,16
++	 insrwi	$carry,$t7,16,0
++	fctid	$T1a,$T1a
++	 addc	$t0,$t0,$t2
++	 adde	$t4,$t4,$t3
++	 lwz	$t3,`$FRAME+32`($sp)	; permuted $t1
++	 lwz	$t2,`$FRAME+36`($sp)	; permuted $t0
++	fctid	$T1b,$T1b
++	 addze	$carry,$carry
++	 addze	$c1,$c1
++	 stw	$t0,4($tp)		; tp[j-1]
++	 stw	$t4,0($tp)
++	fctid	$T2a,$T2a
++	 addc	$t2,$t2,$carry
++	 adde	$t3,$t3,$c1
++	 srwi	$carry,$t2,16
++	 lwz	$t7,`$FRAME+40`($sp)	; permuted $t3
++	 lwz	$t6,`$FRAME+44`($sp)	; permuted $t2
++	fctid	$T2b,$T2b
++	 srwi	$c1,$t3,16
++	 insrwi	$carry,$t3,16,0
++	 lwz	$t1,`$FRAME+48`($sp)	; permuted $t5
++	 lwz	$t0,`$FRAME+52`($sp)	; permuted $t4
++	fctid	$T3a,$T3a
++	 addc	$t6,$t6,$carry
++	 adde	$t7,$t7,$c1
++	 srwi	$carry,$t6,16
++	 lwz	$t5,`$FRAME+56`($sp)	; permuted $t7
++	 lwz	$t4,`$FRAME+60`($sp)	; permuted $t6
++	fctid	$T3b,$T3b
++
++	 insrwi	$t2,$t6,16,0		; 64..95 bits
++	insrwi	$carry,$t7,16,0
++	srwi	$c1,$t7,16
++	 lwz	$t6,20($tp)
++	 lwzu	$t7,16($tp)
++	addc	$t0,$t0,$carry
++	 stfd	$T0a,`$FRAME+0`($sp)
++	adde	$t1,$t1,$c1
++	srwi	$carry,$t0,16
++	 stfd	$T0b,`$FRAME+8`($sp)
++	insrwi	$carry,$t1,16,0
++	srwi	$c1,$t1,16
++	addc	$t4,$t4,$carry
++	 stfd	$T1a,`$FRAME+16`($sp)
++	adde	$t5,$t5,$c1
++	srwi	$carry,$t4,16
++	 insrwi	$t0,$t4,16,0		; 96..127 bits
++	 stfd	$T1b,`$FRAME+24`($sp)
++	insrwi	$carry,$t5,16,0
++	srwi	$c1,$t5,16
++
++	addc	$t2,$t2,$t6
++	 stfd	$T2a,`$FRAME+32`($sp)
++	adde	$t0,$t0,$t7
++	 stfd	$T2b,`$FRAME+40`($sp)
++	addze	$carry,$carry
++	 stfd	$T3a,`$FRAME+48`($sp)
++	addze	$c1,$c1
++	 stfd	$T3b,`$FRAME+56`($sp)
++	 stw	$t2,-4($tp)		; tp[j]
++	 stw	$t0,-8($tp)
++___
++}
++$code.=<<___;
+ 	bdnz-	Linner
+ 
+ 	fctid	$dota,$dota
+ 	fctid	$dotb,$dotb
++___
++if ($SIZE_T==8 or $flavour =~ /osx/) {
++$code.=<<___;
+ 	ld	$t0,`$FRAME+0`($sp)
+ 	ld	$t1,`$FRAME+8`($sp)
+ 	ld	$t2,`$FRAME+16`($sp)
+@@ -926,7 +1351,116 @@ $code.=<<___;
+ 	insrdi	$t6,$t7,48,0
+ 	srdi	$ovf,$t7,48
+ 	std	$t6,0($tp)		; tp[num-1]
++___
++} else {
++$code.=<<___;
++	lwz	$t1,`$FRAME+0`($sp)
++	lwz	$t0,`$FRAME+4`($sp)
++	lwz	$t3,`$FRAME+8`($sp)
++	lwz	$t2,`$FRAME+12`($sp)
++	lwz	$t5,`$FRAME+16`($sp)
++	lwz	$t4,`$FRAME+20`($sp)
++	lwz	$t7,`$FRAME+24`($sp)
++	lwz	$t6,`$FRAME+28`($sp)
++	stfd	$dota,`$FRAME+64`($sp)
++	stfd	$dotb,`$FRAME+72`($sp)
+ 
++	addc	$t0,$t0,$carry
++	adde	$t1,$t1,$c1
++	srwi	$carry,$t0,16
++	insrwi	$carry,$t1,16,0
++	srwi	$c1,$t1,16
++	addc	$t2,$t2,$carry
++	adde	$t3,$t3,$c1
++	srwi	$carry,$t2,16
++	 insrwi	$t0,$t2,16,0		; 0..31 bits
++	 lwz	$t2,12($tp)		; tp[j]
++	insrwi	$carry,$t3,16,0
++	srwi	$c1,$t3,16
++	 lwz	$t3,8($tp)
++	addc	$t4,$t4,$carry
++	adde	$t5,$t5,$c1
++	srwi	$carry,$t4,16
++	insrwi	$carry,$t5,16,0
++	srwi	$c1,$t5,16
++	addc	$t6,$t6,$carry
++	adde	$t7,$t7,$c1
++	srwi	$carry,$t6,16
++	 insrwi	$t4,$t6,16,0		; 32..63 bits
++	insrwi	$carry,$t7,16,0
++	srwi	$c1,$t7,16
++
++	addc	$t0,$t0,$t2
++	adde	$t4,$t4,$t3
++	addze	$carry,$carry
++	addze	$c1,$c1
++	 stw	$t0,4($tp)		; tp[j-1]
++	 stw	$t4,0($tp)
++
++	lwz	$t3,`$FRAME+32`($sp)	; permuted $t1
++	lwz	$t2,`$FRAME+36`($sp)	; permuted $t0
++	lwz	$t7,`$FRAME+40`($sp)	; permuted $t3
++	lwz	$t6,`$FRAME+44`($sp)	; permuted $t2
++	lwz	$t1,`$FRAME+48`($sp)	; permuted $t5
++	lwz	$t0,`$FRAME+52`($sp)	; permuted $t4
++	lwz	$t5,`$FRAME+56`($sp)	; permuted $t7
++	lwz	$t4,`$FRAME+60`($sp)	; permuted $t6
++
++	addc	$t2,$t2,$carry
++	adde	$t3,$t3,$c1
++	srwi	$carry,$t2,16
++	insrwi	$carry,$t3,16,0
++	srwi	$c1,$t3,16
++	addc	$t6,$t6,$carry
++	adde	$t7,$t7,$c1
++	srwi	$carry,$t6,16
++	 insrwi	$t2,$t6,16,0		; 64..95 bits
++	 lwz	$t6,20($tp)
++	insrwi	$carry,$t7,16,0
++	srwi	$c1,$t7,16
++	 lwzu	$t7,16($tp)
++	addc	$t0,$t0,$carry
++	adde	$t1,$t1,$c1
++	srwi	$carry,$t0,16
++	insrwi	$carry,$t1,16,0
++	srwi	$c1,$t1,16
++	addc	$t4,$t4,$carry
++	adde	$t5,$t5,$c1
++	srwi	$carry,$t4,16
++	 insrwi	$t0,$t4,16,0		; 96..127 bits
++	insrwi	$carry,$t5,16,0
++	srwi	$c1,$t5,16
++
++	addc	$t2,$t2,$t6
++	adde	$t0,$t0,$t7
++	 lwz	$t7,`$FRAME+64`($sp)
++	 lwz	$t6,`$FRAME+68`($sp)
++	addze	$carry,$carry
++	addze	$c1,$c1
++	 lwz	$t5,`$FRAME+72`($sp)
++	 lwz	$t4,`$FRAME+76`($sp)
++
++	addc	$t6,$t6,$carry
++	adde	$t7,$t7,$c1
++	 stw	$t2,-4($tp)		; tp[j]
++	 stw	$t0,-8($tp)
++	addc	$t6,$t6,$ovf
++	addze	$t7,$t7
++	srwi	$carry,$t6,16
++	insrwi	$carry,$t7,16,0
++	srwi	$c1,$t7,16
++	addc	$t4,$t4,$carry
++	adde	$t5,$t5,$c1
++
++	insrwi	$t6,$t4,16,0
++	srwi	$t4,$t4,16
++	insrwi	$t4,$t5,16,0
++	srwi	$ovf,$t5,16
++	stw	$t6,4($tp)		; tp[num-1]
++	stw	$t4,0($tp)
++___
++}
++$code.=<<___;
+ 	slwi	$t7,$num,2
+ 	addi	$i,$i,8
+ 	subf	$nap_d,$t7,$nap_d	; rewind pointer
+@@ -994,14 +1528,14 @@ $code.=<<___ if ($SIZE_T==4);
+ 	mtctr	$j
+ 
+ .align	4
+-Lsub:	ld	$t0,8($tp)	; load tp[j..j+3] in 64-bit word order
+-	ldu	$t2,16($tp)
++Lsub:	lwz	$t0,12($tp)	; load tp[j..j+3] in 64-bit word order
++	lwz	$t1,8($tp)
++	lwz	$t2,20($tp)
++	lwzu	$t3,16($tp)
+ 	lwz	$t4,4($np)	; load np[j..j+3] in 32-bit word order
+ 	lwz	$t5,8($np)
+ 	lwz	$t6,12($np)
+ 	lwzu	$t7,16($np)
+-	extrdi	$t1,$t0,32,0
+-	extrdi	$t3,$t2,32,0
+ 	subfe	$t4,$t4,$t0	; tp[j]-np[j]
+ 	 stw	$t0,4($ap)	; save tp[j..j+3] in 32-bit word order
+ 	subfe	$t5,$t5,$t1	; tp[j+1]-np[j+1]
+@@ -1052,6 +1586,9 @@ ___
+ $code.=<<___;
+ 	$POP	$i,0($sp)
+ 	li	r3,1	; signal "handled"
++	$POP	r19,`-12*8-13*$SIZE_T`($i)
++	$POP	r20,`-12*8-12*$SIZE_T`($i)
++	$POP	r21,`-12*8-11*$SIZE_T`($i)
+ 	$POP	r22,`-12*8-10*$SIZE_T`($i)
+ 	$POP	r23,`-12*8-9*$SIZE_T`($i)
+ 	$POP	r24,`-12*8-8*$SIZE_T`($i)
+@@ -1077,8 +1614,9 @@ $code.=<<___;
+ 	mr	$sp,$i
+ 	blr
+ 	.long	0
+-	.byte	0,12,4,0,0x8c,10,6,0
++	.byte	0,12,4,0,0x8c,13,6,0
+ 	.long	0
++.size	.$fname,.-.$fname
+ 
+ .asciz  "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@openssl.org>"
+ ___
+diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
+index 1bfb5d9..51137fd 100644
+--- a/crypto/evp/e_aes.c
++++ b/crypto/evp/e_aes.c
+@@ -153,6 +153,20 @@ void AES_xts_decrypt(const char *inp,char *out,size_t len,
+ 			const unsigned char iv[16]);
+ #endif
+ 
++#if	defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
++# include "ppc_arch.h"
++# ifdef VPAES_ASM
++#  define VPAES_CAPABLE	(OPENSSL_ppccap_P & PPC_ALTIVEC)
++# endif
++# define HWAES_CAPABLE	(OPENSSL_ppccap_P & PPC_CRYPTO207)
++# define HWAES_set_encrypt_key aes_p8_set_encrypt_key
++# define HWAES_set_decrypt_key aes_p8_set_decrypt_key
++# define HWAES_encrypt aes_p8_encrypt
++# define HWAES_decrypt aes_p8_decrypt
++# define HWAES_cbc_encrypt aes_p8_cbc_encrypt
++# define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
++#endif
++
+ #if	defined(AES_ASM) && !defined(I386_ONLY) &&	(  \
+ 	((defined(__i386)	|| defined(__i386__)	|| \
+ 	  defined(_M_IX86)) && defined(OPENSSL_IA32_SSE2))|| \
+diff --git a/crypto/modes/Makefile b/crypto/modes/Makefile
+index c825b12..e684e02 100644
+--- a/crypto/modes/Makefile
++++ b/crypto/modes/Makefile
+@@ -56,6 +56,10 @@ ghash-alpha.s:	asm/ghash-alpha.pl
+ 	$(PERL) $< | $(CC) -E - | tee $@ > /dev/null
+ ghash-parisc.s:	asm/ghash-parisc.pl
+ 	$(PERL) asm/ghash-parisc.pl $(PERLASM_SCHEME) $@
++ghashv8-armx.S:	asm/ghashv8-armx.pl
++	$(PERL) asm/ghashv8-armx.pl $(PERLASM_SCHEME) $@
++ghashp8-ppc.s:	asm/ghashp8-ppc.pl
++	$(PERL) asm/ghashp8-ppc.pl $(PERLASM_SCHEME) $@
+ 
+ # GNU make "catch all"
+ ghash-%.S:	asm/ghash-%.pl;	$(PERL) $< $(PERLASM_SCHEME) $@
+diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl
+new file mode 100755
+index 0000000..e76a58c
+--- /dev/null
++++ b/crypto/modes/asm/ghashp8-ppc.pl
+@@ -0,0 +1,234 @@
++#!/usr/bin/env perl
++#
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++#
++# GHASH for for PowerISA v2.07.
++#
++# July 2014
++#
++# Accurate performance measurements are problematic, because it's
++# always virtualized setup with possibly throttled processor.
++# Relative comparison is therefore more informative. This initial
++# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
++# faster than "4-bit" integer-only compiler-generated 64-bit code.
++# "Initial version" means that there is room for futher improvement.
++
++$flavour=shift;
++$output =shift;
++
++if ($flavour =~ /64/) {
++	$SIZE_T=8;
++	$LRSAVE=2*$SIZE_T;
++	$STU="stdu";
++	$POP="ld";
++	$PUSH="std";
++} elsif ($flavour =~ /32/) {
++	$SIZE_T=4;
++	$LRSAVE=$SIZE_T;
++	$STU="stwu";
++	$POP="lwz";
++	$PUSH="stw";
++} else { die "nonsense $flavour"; }
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
++
++my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6));	# argument block
++
++my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
++my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
++my $vrsave="r12";
++
++$code=<<___;
++.machine	"any"
++
++.text
++
++.globl	.gcm_init_p8
++.align	5
++.gcm_init_p8:
++	lis		r0,0xfff0
++	li		r8,0x10
++	mfspr		$vrsave,256
++	li		r9,0x20
++	mtspr		256,r0
++	li		r10,0x30
++	lvx_u		$H,0,r4			# load H
++
++	vspltisb	$xC2,-16		# 0xf0
++	vspltisb	$t0,1			# one
++	vaddubm		$xC2,$xC2,$xC2		# 0xe0
++	vxor		$zero,$zero,$zero
++	vor		$xC2,$xC2,$t0		# 0xe1
++	vsldoi		$xC2,$xC2,$zero,15	# 0xe1...
++	vsldoi		$t1,$zero,$t0,1		# ...1
++	vaddubm		$xC2,$xC2,$xC2		# 0xc2...
++	vspltisb	$t2,7
++	vor		$xC2,$xC2,$t1		# 0xc2....01
++	vspltb		$t1,$H,0		# most significant byte
++	vsl		$H,$H,$t0		# H<<=1
++	vsrab		$t1,$t1,$t2		# broadcast carry bit
++	vand		$t1,$t1,$xC2
++	vxor		$H,$H,$t1		# twisted H
++
++	vsldoi		$H,$H,$H,8		# twist even more ...
++	vsldoi		$xC2,$zero,$xC2,8	# 0xc2.0
++	vsldoi		$Hl,$zero,$H,8		# ... and split
++	vsldoi		$Hh,$H,$zero,8
++
++	stvx_u		$xC2,0,r3		# save pre-computed table
++	stvx_u		$Hl,r8,r3
++	stvx_u		$H, r9,r3
++	stvx_u		$Hh,r10,r3
++
++	mtspr		256,$vrsave
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0,0,2,0
++	.long		0
++.size	.gcm_init_p8,.-.gcm_init_p8
++
++.globl	.gcm_gmult_p8
++.align	5
++.gcm_gmult_p8:
++	lis		r0,0xfff8
++	li		r8,0x10
++	mfspr		$vrsave,256
++	li		r9,0x20
++	mtspr		256,r0
++	li		r10,0x30
++	lvx_u		$IN,0,$Xip		# load Xi
++
++	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
++	 le?lvsl	$lemask,r0,r0
++	lvx_u		$H, r9,$Htbl
++	 le?vspltisb	$t0,0x07
++	lvx_u		$Hh,r10,$Htbl
++	 le?vxor	$lemask,$lemask,$t0
++	lvx_u		$xC2,0,$Htbl
++	 le?vperm	$IN,$IN,$IN,$lemask
++	vxor		$zero,$zero,$zero
++
++	vpmsumd		$Xl,$IN,$Hl		# H.lo�Xi.lo
++	vpmsumd		$Xm,$IN,$H		# H.hi�Xi.lo+H.lo�Xi.hi
++	vpmsumd		$Xh,$IN,$Hh		# H.hi�Xi.hi
++
++	vpmsumd		$t2,$Xl,$xC2		# 1st phase
++
++	vsldoi		$t0,$Xm,$zero,8
++	vsldoi		$t1,$zero,$Xm,8
++	vxor		$Xl,$Xl,$t0
++	vxor		$Xh,$Xh,$t1
++
++	vsldoi		$Xl,$Xl,$Xl,8
++	vxor		$Xl,$Xl,$t2
++
++	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
++	vpmsumd		$Xl,$Xl,$xC2
++	vxor		$t1,$t1,$Xh
++	vxor		$Xl,$Xl,$t1
++
++	le?vperm	$Xl,$Xl,$Xl,$lemask
++	stvx_u		$Xl,0,$Xip		# write out Xi
++
++	mtspr		256,$vrsave
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0,0,2,0
++	.long		0
++.size	.gcm_gmult_p8,.-.gcm_gmult_p8
++
++.globl	.gcm_ghash_p8
++.align	5
++.gcm_ghash_p8:
++	lis		r0,0xfff8
++	li		r8,0x10
++	mfspr		$vrsave,256
++	li		r9,0x20
++	mtspr		256,r0
++	li		r10,0x30
++	lvx_u		$Xl,0,$Xip		# load Xi
++
++	lvx_u		$Hl,r8,$Htbl		# load pre-computed table
++	 le?lvsl	$lemask,r0,r0
++	lvx_u		$H, r9,$Htbl
++	 le?vspltisb	$t0,0x07
++	lvx_u		$Hh,r10,$Htbl
++	 le?vxor	$lemask,$lemask,$t0
++	lvx_u		$xC2,0,$Htbl
++	 le?vperm	$Xl,$Xl,$Xl,$lemask
++	vxor		$zero,$zero,$zero
++
++	lvx_u		$IN,0,$inp
++	addi		$inp,$inp,16
++	subi		$len,$len,16
++	 le?vperm	$IN,$IN,$IN,$lemask
++	vxor		$IN,$IN,$Xl
++	b		Loop
++
++.align	5
++Loop:
++	 subic		$len,$len,16
++	vpmsumd		$Xl,$IN,$Hl		# H.lo�Xi.lo
++	 subfe.		r0,r0,r0		# borrow?-1:0
++	vpmsumd		$Xm,$IN,$H		# H.hi�Xi.lo+H.lo�Xi.hi
++	 and		r0,r0,$len
++	vpmsumd		$Xh,$IN,$Hh		# H.hi�Xi.hi
++	 add		$inp,$inp,r0
++
++	vpmsumd		$t2,$Xl,$xC2		# 1st phase
++
++	vsldoi		$t0,$Xm,$zero,8
++	vsldoi		$t1,$zero,$Xm,8
++	vxor		$Xl,$Xl,$t0
++	vxor		$Xh,$Xh,$t1
++
++	vsldoi		$Xl,$Xl,$Xl,8
++	vxor		$Xl,$Xl,$t2
++	 lvx_u		$IN,0,$inp
++	 addi		$inp,$inp,16
++
++	vsldoi		$t1,$Xl,$Xl,8		# 2nd phase
++	vpmsumd		$Xl,$Xl,$xC2
++	 le?vperm	$IN,$IN,$IN,$lemask
++	vxor		$t1,$t1,$Xh
++	vxor		$IN,$IN,$t1
++	vxor		$IN,$IN,$Xl
++	beq		Loop			# did $len-=16 borrow?
++
++	vxor		$Xl,$Xl,$t1
++	le?vperm	$Xl,$Xl,$Xl,$lemask
++	stvx_u		$Xl,0,$Xip		# write out Xi
++
++	mtspr		256,$vrsave
++	blr
++	.long		0
++	.byte		0,12,0x14,0,0,0,4,0
++	.long		0
++.size	.gcm_ghash_p8,.-.gcm_ghash_p8
++
++.asciz  "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++.align  2
++___
++
++foreach (split("\n",$code)) {
++	if ($flavour =~ /le$/o) {	# little-endian
++	    s/le\?//o		or
++	    s/be\?/#be#/o;
++	} else {
++	    s/le\?/#le#/o	or
++	    s/be\?//o;
++	}
++	print $_,"\n";
++}
++
++close STDOUT; # enforce flush
+diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
+index 0e6ff8b..6f8e7ee 100644
+--- a/crypto/modes/gcm128.c
++++ b/crypto/modes/gcm128.c
+@@ -671,6 +671,21 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
+ void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
+ void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ #  endif
++# elif defined(__sparc__) || defined(__sparc)
++#  include "sparc_arch.h"
++#  define GHASH_ASM_SPARC
++#  define GCM_FUNCREF_4BIT
++extern unsigned int OPENSSL_sparcv9cap_P[];
++void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
++void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
++void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
++#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
++#  include "ppc_arch.h"
++#  define GHASH_ASM_PPC
++#  define GCM_FUNCREF_4BIT
++void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
++void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
++void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
+ # endif
+ #endif
+ 
+@@ -747,6 +762,16 @@ void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
+ 		ctx->gmult = gcm_gmult_4bit;
+ 		ctx->ghash = gcm_ghash_4bit;
+ 	}
++# elif	defined(GHASH_ASM_PPC)
++	if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
++		gcm_init_p8(ctx->Htable,ctx->H.u);
++		ctx->gmult = gcm_gmult_p8;
++		ctx->ghash = gcm_ghash_p8;
++	} else {
++		gcm_init_4bit(ctx->Htable,ctx->H.u);
++		ctx->gmult = gcm_gmult_4bit;
++		ctx->ghash = gcm_ghash_4bit;
++	}
+ # else
+ 	gcm_init_4bit(ctx->Htable,ctx->H.u);
+ # endif
+diff --git a/crypto/perlasm/ppc-xlate.pl b/crypto/perlasm/ppc-xlate.pl
+index a3edd98..f89e814 100755
+--- a/crypto/perlasm/ppc-xlate.pl
++++ b/crypto/perlasm/ppc-xlate.pl
+@@ -27,7 +27,8 @@ my $globl = sub {
+ 	/osx/		&& do { $name = "_$name";
+ 				last;
+ 			      };
+-	/linux.*32/	&& do {	$ret .= ".globl	$name\n";
++	/linux.*(32|64le)/
++			&& do {	$ret .= ".globl	$name\n";
+ 				$ret .= ".type	$name,\@function";
+ 				last;
+ 			      };
+@@ -37,7 +38,6 @@ my $globl = sub {
+ 				$ret .= ".align	3\n";
+ 				$ret .= "$name:\n";
+ 				$ret .= ".quad	.$name,.TOC.\@tocbase,0\n";
+-				$ret .= ".size	$name,24\n";
+ 				$ret .= ".previous\n";
+ 
+ 				$name = ".$name";
+@@ -50,7 +50,9 @@ my $globl = sub {
+     $ret;
+ };
+ my $text = sub {
+-    ($flavour =~ /aix/) ? ".csect" : ".text";
++    my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
++    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
++    $ret;
+ };
+ my $machine = sub {
+     my $junk = shift;
+@@ -62,9 +64,12 @@ my $machine = sub {
+     ".machine	$arch";
+ };
+ my $size = sub {
+-    if ($flavour =~ /linux.*32/)
++    if ($flavour =~ /linux/)
+     {	shift;
+-	".size	" . join(",",@_);
++	my $name = shift; $name =~ s|^[\.\_]||;
++	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name;
++	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/);
++	$ret;
+     }
+     else
+     {	"";	}
+@@ -77,6 +82,25 @@ my $asciz = sub {
+     else
+     {	"";	}
+ };
++my $quad = sub {
++    shift;
++    my @ret;
++    my ($hi,$lo);
++    for (@_) {
++	if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
++	{  $hi=$1?"0x$1":"0"; $lo="0x$2";  }
++	elsif (/^([0-9]+)$/o)
++	{  $hi=$1>>32; $lo=$1&0xffffffff;  } # error-prone with 32-bit perl
++	else
++	{  $hi=undef; $lo=$_; }
++
++	if (defined($hi))
++	{  push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo");  }
++	else
++	{  push(@ret,".quad	$lo");  }
++    }
++    join("\n",@ret);
++};
+ 
+ ################################################################
+ # simplified mnemonics not handled by at least one assembler
+@@ -122,6 +146,46 @@ my $extrdi = sub {
+     $b = ($b+$n)&63; $n = 64-$n;
+     "	rldicl	$ra,$rs,$b,$n";
+ };
++my $vmr = sub {
++    my ($f,$vx,$vy) = @_;
++    "	vor	$vx,$vy,$vy";
++};
++
++# PowerISA 2.06 stuff
++sub vsxmem_op {
++    my ($f, $vrt, $ra, $rb, $op) = @_;
++    "	.long	".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
++}
++# made-up unaligned memory reference AltiVec/VMX instructions
++my $lvx_u	= sub {	vsxmem_op(@_, 844); };	# lxvd2x
++my $stvx_u	= sub {	vsxmem_op(@_, 972); };	# stxvd2x
++my $lvdx_u	= sub {	vsxmem_op(@_, 588); };	# lxsdx
++my $stvdx_u	= sub {	vsxmem_op(@_, 716); };	# stxsdx
++my $lvx_4w	= sub { vsxmem_op(@_, 780); };	# lxvw4x
++my $stvx_4w	= sub { vsxmem_op(@_, 908); };	# stxvw4x
++
++# PowerISA 2.07 stuff
++sub vcrypto_op {
++    my ($f, $vrt, $vra, $vrb, $op) = @_;
++    "	.long	".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
++}
++my $vcipher	= sub { vcrypto_op(@_, 1288); };
++my $vcipherlast	= sub { vcrypto_op(@_, 1289); };
++my $vncipher	= sub { vcrypto_op(@_, 1352); };
++my $vncipherlast= sub { vcrypto_op(@_, 1353); };
++my $vsbox	= sub { vcrypto_op(@_, 0, 1480); };
++my $vshasigmad	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
++my $vshasigmaw	= sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
++my $vpmsumb	= sub { vcrypto_op(@_, 1032); };
++my $vpmsumd	= sub { vcrypto_op(@_, 1224); };
++my $vpmsubh	= sub { vcrypto_op(@_, 1096); };
++my $vpmsumw	= sub { vcrypto_op(@_, 1160); };
++my $vaddudm	= sub { vcrypto_op(@_, 192);  };
++
++my $mtsle	= sub {
++    my ($f, $arg) = @_;
++    "	.long	".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
++};
+ 
+ while($line=<>) {
+ 
+@@ -138,7 +202,10 @@ while($line=<>) {
+     {
+ 	$line =~ s|(^[\.\w]+)\:\s*||;
+ 	my $label = $1;
+-	printf "%s:",($GLOBALS{$label} or $label) if ($label);
++	if ($label) {
++	    printf "%s:",($GLOBALS{$label} or $label);
++	    printf "\n.localentry\t$GLOBALS{$label},0"	if ($GLOBALS{$label} && $flavour =~ /linux.*64le/);
++	}
+     }
+ 
+     {
+@@ -147,7 +214,7 @@ while($line=<>) {
+ 	my $mnemonic = $2;
+ 	my $f = $3;
+ 	my $opcode = eval("\$$mnemonic");
+-	$line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/);
++	$line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ 	if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ 	elsif ($mnemonic)           { $line = $c.$mnemonic.$f."\t".$line; }
+     }
+diff --git a/crypto/ppc_arch.h b/crypto/ppc_arch.h
+new file mode 100644
+index 0000000..1192edf
+--- /dev/null
++++ b/crypto/ppc_arch.h
+@@ -0,0 +1,10 @@
++#ifndef __PPC_ARCH_H__
++#define __PPC_ARCH_H__
++
++extern unsigned int OPENSSL_ppccap_P;
++
++#define PPC_FPU64	(1<<0)
++#define PPC_ALTIVEC	(1<<1)
++#define PPC_CRYPTO207	(1<<2)
++
++#endif
+diff --git a/crypto/ppccap.c b/crypto/ppccap.c
+index f71ba66..13c2ca5 100644
+--- a/crypto/ppccap.c
++++ b/crypto/ppccap.c
+@@ -4,13 +4,15 @@
+ #include <setjmp.h>
+ #include <signal.h>
+ #include <unistd.h>
++#if defined(__linux) || defined(_AIX)
++#include <sys/utsname.h>
++#endif
+ #include <crypto.h>
+ #include <openssl/bn.h>
+ 
+-#define PPC_FPU64	(1<<0)
+-#define PPC_ALTIVEC	(1<<1)
++#include "ppc_arch.h"
+ 
+-static int OPENSSL_ppccap_P = 0;
++unsigned int OPENSSL_ppccap_P = 0;
+ 
+ static sigset_t all_masked;
+ 
+@@ -22,7 +24,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
+ 
+ 	if (sizeof(size_t)==4)
+ 		{
+-#if (defined(__APPLE__) && defined(__MACH__))
++#if 1 || (defined(__APPLE__) && defined(__MACH__))
+ 		if (num>=8 && (num&3)==0 && (OPENSSL_ppccap_P&PPC_FPU64))
+ 			return bn_mul_mont_fpu64(rp,ap,bp,np,n0,num);
+ #else
+@@ -50,11 +52,28 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
+ 	}
+ #endif
+ 
++void sha256_block_p8(void *ctx,const void *inp,size_t len);
++void sha256_block_ppc(void *ctx,const void *inp,size_t len);
++void sha256_block_data_order(void *ctx,const void *inp,size_t len)
++	{
++	OPENSSL_ppccap_P&PPC_CRYPTO207?	sha256_block_p8(ctx,inp,len):
++					sha256_block_ppc(ctx,inp,len);
++	}
++
++void sha512_block_p8(void *ctx,const void *inp,size_t len);
++void sha512_block_ppc(void *ctx,const void *inp,size_t len);
++void sha512_block_data_order(void *ctx,const void *inp,size_t len)
++	{
++	OPENSSL_ppccap_P&PPC_CRYPTO207?	sha512_block_p8(ctx,inp,len):
++					sha512_block_ppc(ctx,inp,len);
++	}
++
+ static sigjmp_buf ill_jmp;
+ static void ill_handler (int sig) { siglongjmp(ill_jmp,sig); }
+ 
+ void OPENSSL_ppc64_probe(void);
+ void OPENSSL_altivec_probe(void);
++void OPENSSL_crypto207_probe(void);
+ 
+ void OPENSSL_cpuid_setup(void)
+ 	{
+@@ -85,12 +104,14 @@ void OPENSSL_cpuid_setup(void)
+ 	OPENSSL_ppccap_P = 0;
+ 
+ #if defined(_AIX)
+-	if (sizeof(size_t)==4
++	if (sizeof(size_t)==4)
++		{
++		struct utsname uts;
+ # if defined(_SC_AIX_KERNEL_BITMODE)
+-	    && sysconf(_SC_AIX_KERNEL_BITMODE)!=64
++		if (sysconf(_SC_AIX_KERNEL_BITMODE)!=64)	return;
+ # endif
+-	   )
+-		return;
++		if (uname(&uts)!=0 || atoi(uts.version)<6)	return;
++		}
+ #endif
+ 
+ 	memset(&ill_act,0,sizeof(ill_act));
+@@ -102,6 +123,10 @@ void OPENSSL_cpuid_setup(void)
+ 
+ 	if (sizeof(size_t)==4)
+ 		{
++#ifdef __linux
++		struct utsname uts;
++		if (uname(&uts)==0 && strcmp(uts.machine,"ppc64")==0)
++#endif
+ 		if (sigsetjmp(ill_jmp,1) == 0)
+ 			{
+ 			OPENSSL_ppc64_probe();
+@@ -119,6 +144,11 @@ void OPENSSL_cpuid_setup(void)
+ 		{
+ 		OPENSSL_altivec_probe();
+ 		OPENSSL_ppccap_P |= PPC_ALTIVEC;
++		if (sigsetjmp(ill_jmp,1) == 0)
++			{
++			OPENSSL_crypto207_probe();
++			OPENSSL_ppccap_P |= PPC_CRYPTO207;
++			}
+ 		}
+ 
+ 	sigaction (SIGILL,&ill_oact,NULL);
+diff --git a/crypto/ppccpuid.pl b/crypto/ppccpuid.pl
+index 4ba736a..56cc851 100755
+--- a/crypto/ppccpuid.pl
++++ b/crypto/ppccpuid.pl
+@@ -31,6 +31,7 @@ $code=<<___;
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	.OPENSSL_ppc64_probe,.-.OPENSSL_ppc64_probe
+ 
+ .globl	.OPENSSL_altivec_probe
+ .align	4
+@@ -39,6 +40,17 @@ $code=<<___;
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	.OPENSSL_altivec_probe,.-..OPENSSL_altivec_probe
++
++.globl	.OPENSSL_crypto207_probe
++.align	4
++.OPENSSL_crypto207_probe:
++	lvx_u	v0,0,r1
++	vcipher	v0,v0,v0
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++.size	.OPENSSL_crypto207_probe,.-.OPENSSL_crypto207_probe
+ 
+ .globl	.OPENSSL_wipe_cpu
+ .align	4
+@@ -71,6 +83,7 @@ $code=<<___;
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	.OPENSSL_wipe_cpu,.-.OPENSSL_wipe_cpu
+ 
+ .globl	.OPENSSL_atomic_add
+ .align	4
+@@ -84,6 +97,7 @@ Ladd:	lwarx	r5,0,r3
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,2,0
+ 	.long	0
++.size	.OPENSSL_atomic_add,.-.OPENSSL_atomic_add
+ 
+ .globl	.OPENSSL_rdtsc
+ .align	4
+@@ -93,6 +107,7 @@ Ladd:	lwarx	r5,0,r3
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	.OPENSSL_rdtsc,.-.OPENSSL_rdtsc
+ 
+ .globl	.OPENSSL_cleanse
+ .align	4
+@@ -125,7 +140,99 @@ Laligned:
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,2,0
+ 	.long	0
++.size	.OPENSSL_cleanse,.-.OPENSSL_cleanse
++___
++{
++my ($out,$cnt,$max)=("r3","r4","r5");
++my ($tick,$lasttick)=("r6","r7");
++my ($diff,$lastdiff)=("r8","r9");
++
++$code.=<<___;
++.globl	.OPENSSL_instrument_bus
++.align	4
++.OPENSSL_instrument_bus:
++	mtctr	$cnt
++
++	mftb	$lasttick		# collect 1st tick
++	li	$diff,0
++
++	dcbf	0,$out			# flush cache line
++	lwarx	$tick,0,$out		# load and lock
++	add	$tick,$tick,$diff
++	stwcx.	$tick,0,$out
++	stwx	$tick,0,$out
++
++Loop:	mftb	$tick
++	sub	$diff,$tick,$lasttick
++	mr	$lasttick,$tick
++	dcbf	0,$out			# flush cache line
++	lwarx	$tick,0,$out		# load and lock
++	add	$tick,$tick,$diff
++	stwcx.	$tick,0,$out
++	stwx	$tick,0,$out
++	addi	$out,$out,4		# ++$out
++	bdnz	Loop
++
++	mr	r3,$cnt
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,2,0
++	.long	0
++.size	.OPENSSL_instrument_bus,.-.OPENSSL_instrument_bus
++
++.globl	.OPENSSL_instrument_bus2
++.align	4
++.OPENSSL_instrument_bus2:
++	mr	r0,$cnt
++	slwi	$cnt,$cnt,2
++
++	mftb	$lasttick		# collect 1st tick
++	li	$diff,0
++
++	dcbf	0,$out			# flush cache line
++	lwarx	$tick,0,$out		# load and lock
++	add	$tick,$tick,$diff
++	stwcx.	$tick,0,$out
++	stwx	$tick,0,$out
++
++	mftb	$tick			# collect 1st diff
++	sub	$diff,$tick,$lasttick
++	mr	$lasttick,$tick
++	mr	$lastdiff,$diff
++Loop2:
++	dcbf	0,$out			# flush cache line
++	lwarx	$tick,0,$out		# load and lock
++	add	$tick,$tick,$diff
++	stwcx.	$tick,0,$out
++	stwx	$tick,0,$out
++
++	addic.	$max,$max,-1
++	beq	Ldone2
++
++	mftb	$tick
++	sub	$diff,$tick,$lasttick
++	mr	$lasttick,$tick
++	cmplw	7,$diff,$lastdiff
++	mr	$lastdiff,$diff
++
++	mfcr	$tick			# pull cr
++	not	$tick,$tick		# flip bits
++	rlwinm	$tick,$tick,1,29,29	# isolate flipped eq bit and scale
++
++	sub.	$cnt,$cnt,$tick		# conditional --$cnt
++	add	$out,$out,$tick		# conditional ++$out
++	bne	Loop2
++
++Ldone2:
++	srwi	$cnt,$cnt,2
++	sub	r3,r0,$cnt
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,3,0
++	.long	0
++.size	.OPENSSL_instrument_bus2,.-.OPENSSL_instrument_bus2
+ ___
++}
+ 
+ $code =~ s/\`([^\`]*)\`/eval $1/gem;
+ print $code;
+diff --git a/crypto/sha/Makefile b/crypto/sha/Makefile
+index 6d191d3..58c6705 100644
+--- a/crypto/sha/Makefile
++++ b/crypto/sha/Makefile
+@@ -73,6 +73,8 @@ sha512-sparcv9.s:asm/sha512-sparcv9.pl;	$(PERL) asm/sha512-sparcv9.pl $@ $(CFLAG
+ sha1-ppc.s:	asm/sha1-ppc.pl;	$(PERL) asm/sha1-ppc.pl $(PERLASM_SCHEME) $@
+ sha256-ppc.s:	asm/sha512-ppc.pl;	$(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
+ sha512-ppc.s:	asm/sha512-ppc.pl;	$(PERL) asm/sha512-ppc.pl $(PERLASM_SCHEME) $@
++sha256p8-ppc.s:	asm/sha512p8-ppc.pl;	$(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
++sha512p8-ppc.s:	asm/sha512p8-ppc.pl;	$(PERL) asm/sha512p8-ppc.pl $(PERLASM_SCHEME) $@
+ 
+ sha1-parisc.s:	asm/sha1-parisc.pl;	$(PERL) asm/sha1-parisc.pl $(PERLASM_SCHEME) $@
+ sha256-parisc.s:asm/sha512-parisc.pl;	$(PERL) asm/sha512-parisc.pl $(PERLASM_SCHEME) $@
+diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl
+index 2140dd2..df59896 100755
+--- a/crypto/sha/asm/sha1-ppc.pl
++++ b/crypto/sha/asm/sha1-ppc.pl
+@@ -9,8 +9,7 @@
+ 
+ # I let hardware handle unaligned input(*), except on page boundaries
+ # (see below for details). Otherwise straightforward implementation
+-# with X vector in register bank. The module is big-endian [which is
+-# not big deal as there're no little-endian targets left around].
++# with X vector in register bank.
+ #
+ # (*) this means that this module is inappropriate for PPC403? Does
+ #     anybody know if pre-POWER3 can sustain unaligned load?
+@@ -38,6 +37,10 @@ if ($flavour =~ /64/) {
+ 	$PUSH	="stw";
+ } else { die "nonsense $flavour"; }
+ 
++# Define endianess based on flavour
++# i.e.: linux64le
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -68,14 +71,28 @@ $T  ="r12";
+ @X=("r16","r17","r18","r19","r20","r21","r22","r23",
+     "r24","r25","r26","r27","r28","r29","r30","r31");
+ 
++sub loadbe {
++my ($dst, $src, $temp_reg) = @_;
++$code.=<<___ if (!$LITTLE_ENDIAN);
++	lwz	$dst,$src
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++	lwz	$temp_reg,$src
++	rotlwi	$dst,$temp_reg,8
++	rlwimi	$dst,$temp_reg,24,0,7
++	rlwimi	$dst,$temp_reg,24,16,23
++___
++}
++
+ sub BODY_00_19 {
+ my ($i,$a,$b,$c,$d,$e,$f)=@_;
+ my $j=$i+1;
+-$code.=<<___ if ($i==0);
+-	lwz	@X[$i],`$i*4`($inp)
+-___
++
++	# Since the last value of $f is discarded, we can use
++	# it as a temp reg to swap byte-order when needed.
++	loadbe("@X[$i]","`$i*4`($inp)",$f) if ($i==0);
++	loadbe("@X[$j]","`$j*4`($inp)",$f) if ($i<15);
+ $code.=<<___ if ($i<15);
+-	lwz	@X[$j],`$j*4`($inp)
+ 	add	$f,$K,$e
+ 	rotlwi	$e,$a,5
+ 	add	$f,$f,@X[$i]
+@@ -108,31 +125,31 @@ my ($i,$a,$b,$c,$d,$e,$f)=@_;
+ my $j=$i+1;
+ $code.=<<___ if ($i<79);
+ 	add	$f,$K,$e
++	xor	$t0,$b,$d
+ 	rotlwi	$e,$a,5
+ 	xor	@X[$j%16],@X[$j%16],@X[($j+2)%16]
+ 	add	$f,$f,@X[$i%16]
+-	xor	$t0,$b,$c
++	xor	$t0,$t0,$c
+ 	xor	@X[$j%16],@X[$j%16],@X[($j+8)%16]
+-	add	$f,$f,$e
++	add	$f,$f,$t0
+ 	rotlwi	$b,$b,30
+-	xor	$t0,$t0,$d
+ 	xor	@X[$j%16],@X[$j%16],@X[($j+13)%16]
+-	add	$f,$f,$t0
++	add	$f,$f,$e
+ 	rotlwi	@X[$j%16],@X[$j%16],1
+ ___
+ $code.=<<___ if ($i==79);
+ 	add	$f,$K,$e
++	xor	$t0,$b,$d
+ 	rotlwi	$e,$a,5
+ 	lwz	r16,0($ctx)
+ 	add	$f,$f,@X[$i%16]
+-	xor	$t0,$b,$c
++	xor	$t0,$t0,$c
+ 	lwz	r17,4($ctx)
+-	add	$f,$f,$e
++	add	$f,$f,$t0
+ 	rotlwi	$b,$b,30
+ 	lwz	r18,8($ctx)
+-	xor	$t0,$t0,$d
+ 	lwz	r19,12($ctx)
+-	add	$f,$f,$t0
++	add	$f,$f,$e
+ 	lwz	r20,16($ctx)
+ ___
+ }
+@@ -316,6 +333,7 @@ $code.=<<___;
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	.sha1_block_data_order,.-.sha1_block_data_order
+ ___
+ $code.=<<___;
+ .asciz	"SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
+diff --git a/crypto/sha/asm/sha512-ppc.pl b/crypto/sha/asm/sha512-ppc.pl
+index 6b44a68..734f3c1 100755
+--- a/crypto/sha/asm/sha512-ppc.pl
++++ b/crypto/sha/asm/sha512-ppc.pl
+@@ -1,7 +1,7 @@
+ #!/usr/bin/env perl
+ 
+ # ====================================================================
+-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+ # project. The module is, however, dual licensed under OpenSSL and
+ # CRYPTOGAMS licenses depending on where you obtain it. For further
+ # details see http://www.openssl.org/~appro/cryptogams/.
+@@ -9,8 +9,7 @@
+ 
+ # I let hardware handle unaligned input, except on page boundaries
+ # (see below for details). Otherwise straightforward implementation
+-# with X vector in register bank. The module is big-endian [which is
+-# not big deal as there're no little-endian targets left around].
++# with X vector in register bank.
+ 
+ #			sha256		|	sha512
+ # 			-m64	-m32	|	-m64	-m32
+@@ -56,6 +55,8 @@ if ($flavour =~ /64/) {
+ 	$PUSH="stw";
+ } else { die "nonsense $flavour"; }
+ 
++$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
++
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+@@ -64,7 +65,7 @@ die "can't locate ppc-xlate.pl";
+ open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+ 
+ if ($output =~ /512/) {
+-	$func="sha512_block_data_order";
++	$func="sha512_block_ppc";
+ 	$SZ=8;
+ 	@Sigma0=(28,34,39);
+ 	@Sigma1=(14,18,41);
+@@ -76,7 +77,7 @@ if ($output =~ /512/) {
+ 	$ROR="rotrdi";
+ 	$SHR="srdi";
+ } else {
+-	$func="sha256_block_data_order";
++	$func="sha256_block_ppc";
+ 	$SZ=4;
+ 	@Sigma0=( 2,13,22);
+ 	@Sigma1=( 6,11,25);
+@@ -110,7 +111,7 @@ $B  ="r9";
+ $C  ="r10";
+ $D  ="r11";
+ $E  ="r12";
+-$F  ="r13";	$F="r2" if ($SIZE_T==8);# reassigned to exempt TLS pointer
++$F  =$t1;	$t1 = "r0";	# stay away from "r13";
+ $G  ="r14";
+ $H  ="r15";
+ 
+@@ -118,24 +119,23 @@ $H  ="r15";
+ @X=("r16","r17","r18","r19","r20","r21","r22","r23",
+     "r24","r25","r26","r27","r28","r29","r30","r31");
+ 
+-$inp="r31";	# reassigned $inp! aliases with @X[15]
++$inp="r31" if($SZ==4 || $SIZE_T==8);	# reassigned $inp! aliases with @X[15]
+ 
+ sub ROUND_00_15 {
+ my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
+ $code.=<<___;
+-	$LD	$T,`$i*$SZ`($Tbl)
+ 	$ROR	$a0,$e,$Sigma1[0]
+ 	$ROR	$a1,$e,$Sigma1[1]
+ 	and	$t0,$f,$e
+-	andc	$t1,$g,$e
+-	add	$T,$T,$h
+ 	xor	$a0,$a0,$a1
++	add	$h,$h,$t1
++	andc	$t1,$g,$e
+ 	$ROR	$a1,$a1,`$Sigma1[2]-$Sigma1[1]`
+ 	or	$t0,$t0,$t1		; Ch(e,f,g)
+-	add	$T,$T,@X[$i]
++	add	$h,$h,@X[$i%16]
+ 	xor	$a0,$a0,$a1		; Sigma1(e)
+-	add	$T,$T,$t0
+-	add	$T,$T,$a0
++	add	$h,$h,$t0
++	add	$h,$h,$a0
+ 
+ 	$ROR	$a0,$a,$Sigma0[0]
+ 	$ROR	$a1,$a,$Sigma0[1]
+@@ -146,9 +146,14 @@ $code.=<<___;
+ 	xor	$t0,$t0,$t1
+ 	and	$t1,$b,$c
+ 	xor	$a0,$a0,$a1		; Sigma0(a)
+-	add	$d,$d,$T
++	add	$d,$d,$h
+ 	xor	$t0,$t0,$t1		; Maj(a,b,c)
+-	add	$h,$T,$a0
++___
++$code.=<<___ if ($i<15);
++	$LD	$t1,`($i+1)*$SZ`($Tbl)
++___
++$code.=<<___;
++	add	$h,$h,$a0
+ 	add	$h,$h,$t0
+ 
+ ___
+@@ -169,10 +174,11 @@ $code.=<<___;
+ 	add	@X[$i],@X[$i],@X[($i+9)%16]
+ 	xor	$a0,$a0,$a1		; sigma0(X[(i+1)&0x0f])
+ 	xor	$t0,$t0,$t1		; sigma1(X[(i+14)&0x0f])
++	$LD	$t1,`$i*$SZ`($Tbl)
+ 	add	@X[$i],@X[$i],$a0
+ 	add	@X[$i],@X[$i],$t0
+ ___
+-&ROUND_00_15($i,$a,$b,$c,$d,$e,$f,$g,$h);
++&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
+ }
+ 
+ $code=<<___;
+@@ -188,8 +194,6 @@ $func:
+ 
+ 	$PUSH	$ctx,`$FRAME-$SIZE_T*22`($sp)
+ 
+-	$PUSH	$toc,`$FRAME-$SIZE_T*20`($sp)
+-	$PUSH	r13,`$FRAME-$SIZE_T*19`($sp)
+ 	$PUSH	r14,`$FRAME-$SIZE_T*18`($sp)
+ 	$PUSH	r15,`$FRAME-$SIZE_T*17`($sp)
+ 	$PUSH	r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -209,7 +213,10 @@ $func:
+ 	$PUSH	r30,`$FRAME-$SIZE_T*2`($sp)
+ 	$PUSH	r31,`$FRAME-$SIZE_T*1`($sp)
+ 	$PUSH	r0,`$FRAME+$LRSAVE`($sp)
++___
+ 
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ 	$LD	$A,`0*$SZ`($ctx)
+ 	mr	$inp,r4				; incarnate $inp
+ 	$LD	$B,`1*$SZ`($ctx)
+@@ -219,7 +226,16 @@ $func:
+ 	$LD	$F,`5*$SZ`($ctx)
+ 	$LD	$G,`6*$SZ`($ctx)
+ 	$LD	$H,`7*$SZ`($ctx)
++___
++} else {
++  for ($i=16;$i<32;$i++) {
++    $code.=<<___;
++	lwz	r$i,`$LITTLE_ENDIAN^(4*($i-16))`($ctx)
++___
++  }
++}
+ 
++$code.=<<___;
+ 	bl	LPICmeup
+ LPICedup:
+ 	andi.	r0,$inp,3
+@@ -255,6 +271,9 @@ Lunaligned:
+ Lcross_page:
+ 	li	$t1,`16*$SZ/4`
+ 	mtctr	$t1
++___
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ 	addi	r20,$sp,$LOCALS			; aligned spot below the frame
+ Lmemcpy:
+ 	lbz	r16,0($inp)
+@@ -268,7 +287,26 @@ Lmemcpy:
+ 	stb	r19,3(r20)
+ 	addi	r20,r20,4
+ 	bdnz	Lmemcpy
++___
++} else {
++$code.=<<___;
++	addi	r12,$sp,$LOCALS			; aligned spot below the frame
++Lmemcpy:
++	lbz	r8,0($inp)
++	lbz	r9,1($inp)
++	lbz	r10,2($inp)
++	lbz	r11,3($inp)
++	addi	$inp,$inp,4
++	stb	r8,0(r12)
++	stb	r9,1(r12)
++	stb	r10,2(r12)
++	stb	r11,3(r12)
++	addi	r12,r12,4
++	bdnz	Lmemcpy
++___
++}
+ 
++$code.=<<___;
+ 	$PUSH	$inp,`$FRAME-$SIZE_T*26`($sp)	; save real inp
+ 	addi	$t1,$sp,`$LOCALS+16*$SZ`	; fictitious end pointer
+ 	addi	$inp,$sp,$LOCALS		; fictitious inp pointer
+@@ -283,8 +321,6 @@ Lmemcpy:
+ 
+ Ldone:
+ 	$POP	r0,`$FRAME+$LRSAVE`($sp)
+-	$POP	$toc,`$FRAME-$SIZE_T*20`($sp)
+-	$POP	r13,`$FRAME-$SIZE_T*19`($sp)
+ 	$POP	r14,`$FRAME-$SIZE_T*18`($sp)
+ 	$POP	r15,`$FRAME-$SIZE_T*17`($sp)
+ 	$POP	r16,`$FRAME-$SIZE_T*16`($sp)
+@@ -309,27 +345,48 @@ Ldone:
+ 	.long	0
+ 	.byte	0,12,4,1,0x80,18,3,0
+ 	.long	0
++___
+ 
++if ($SZ==4 || $SIZE_T==8) {
++$code.=<<___;
+ .align	4
+ Lsha2_block_private:
++	$LD	$t1,0($Tbl)
+ ___
+ for($i=0;$i<16;$i++) {
+-$code.=<<___ if ($SZ==4);
++$code.=<<___ if ($SZ==4 && !$LITTLE_ENDIAN);
+ 	lwz	@X[$i],`$i*$SZ`($inp)
+ ___
++$code.=<<___ if ($SZ==4 && $LITTLE_ENDIAN);
++	lwz	$a0,`$i*$SZ`($inp)
++	rotlwi	@X[$i],$a0,8
++	rlwimi	@X[$i],$a0,24,0,7
++	rlwimi	@X[$i],$a0,24,16,23
++___
+ # 64-bit loads are split to 2x32-bit ones, as CPU can't handle
+ # unaligned 64-bit loads, only 32-bit ones...
+-$code.=<<___ if ($SZ==8);
++$code.=<<___ if ($SZ==8 && !$LITTLE_ENDIAN);
+ 	lwz	$t0,`$i*$SZ`($inp)
+ 	lwz	@X[$i],`$i*$SZ+4`($inp)
+ 	insrdi	@X[$i],$t0,32,0
+ ___
++$code.=<<___ if ($SZ==8 && $LITTLE_ENDIAN);
++	lwz	$a0,`$i*$SZ`($inp)
++	 lwz	$a1,`$i*$SZ+4`($inp)
++	rotlwi	$t0,$a0,8
++	 rotlwi	@X[$i],$a1,8
++	rlwimi	$t0,$a0,24,0,7
++	 rlwimi	@X[$i],$a1,24,0,7
++	rlwimi	$t0,$a0,24,16,23
++	 rlwimi	@X[$i],$a1,24,16,23
++	insrdi	@X[$i],$t0,32,0
++___
+ 	&ROUND_00_15($i,@V);
+ 	unshift(@V,pop(@V));
+ }
+ $code.=<<___;
+-	li	$T,`$rounds/16-1`
+-	mtctr	$T
++	li	$t0,`$rounds/16-1`
++	mtctr	$t0
+ .align	4
+ Lrounds:
+ 	addi	$Tbl,$Tbl,`16*$SZ`
+@@ -377,7 +434,282 @@ $code.=<<___;
+ 	blr
+ 	.long	0
+ 	.byte	0,12,0x14,0,0,0,0,0
++.size	$func,.-$func
++___
++} else {
++########################################################################
++# SHA512 for PPC32, X vector is off-loaded to stack...
++#
++#			|	sha512
++#			|	-m32
++# ----------------------+-----------------------
++# PPC74x0,gcc-4.0.1	|	+48%
++# POWER6,gcc-4.4.6	|	+124%(*)
++# POWER7,gcc-4.4.6	|	+79%(*)
++# e300,gcc-4.1.0	|	+167%
++#
++# (*)	~1/3 of -m64 result [and ~20% better than -m32 code generated
++#	by xlc-12.1]
++
++my $XOFF=$LOCALS;
++
++my @V=map("r$_",(16..31));	# A..H
++
++my ($s0,$s1,$t0,$t1,$t2,$t3,$a0,$a1,$a2,$a3)=map("r$_",(0,5,6,8..12,14,15));
++my ($x0,$x1)=("r3","r4");	# zaps $ctx and $inp
++
++sub ROUND_00_15_ppc32 {
++my ($i,	$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
++	$ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
++
++$code.=<<___;
++	lwz	$t2,`$SZ*($i%16)+($LITTLE_ENDIAN^4)`($Tbl)
++	 xor	$a0,$flo,$glo
++	lwz	$t3,`$SZ*($i%16)+($LITTLE_ENDIAN^0)`($Tbl)
++	 xor	$a1,$fhi,$ghi
++	addc	$hlo,$hlo,$t0			; h+=x[i]
++	stw	$t0,`$XOFF+0+$SZ*($i%16)`($sp)	; save x[i]
++
++	srwi	$s0,$elo,$Sigma1[0]
++	srwi	$s1,$ehi,$Sigma1[0]
++	 and	$a0,$a0,$elo
++	adde	$hhi,$hhi,$t1
++	 and	$a1,$a1,$ehi
++	stw	$t1,`$XOFF+4+$SZ*($i%16)`($sp)
++	srwi	$t0,$elo,$Sigma1[1]
++	srwi	$t1,$ehi,$Sigma1[1]
++	 addc	$hlo,$hlo,$t2			; h+=K512[i]
++	insrwi	$s0,$ehi,$Sigma1[0],0
++	insrwi	$s1,$elo,$Sigma1[0],0
++	 xor	$a0,$a0,$glo			; Ch(e,f,g)
++	 adde	$hhi,$hhi,$t3
++	 xor	$a1,$a1,$ghi
++	insrwi	$t0,$ehi,$Sigma1[1],0
++	insrwi	$t1,$elo,$Sigma1[1],0
++	 addc	$hlo,$hlo,$a0			; h+=Ch(e,f,g)
++	srwi	$t2,$ehi,$Sigma1[2]-32
++	srwi	$t3,$elo,$Sigma1[2]-32
++	xor	$s0,$s0,$t0
++	xor	$s1,$s1,$t1
++	insrwi	$t2,$elo,$Sigma1[2]-32,0
++	insrwi	$t3,$ehi,$Sigma1[2]-32,0
++	 xor	$a0,$alo,$blo			; a^b, b^c in next round
++	 adde	$hhi,$hhi,$a1
++	 xor	$a1,$ahi,$bhi
++	xor	$s0,$s0,$t2			; Sigma1(e)
++	xor	$s1,$s1,$t3
++
++	srwi	$t0,$alo,$Sigma0[0]
++	 and	$a2,$a2,$a0
++	 addc	$hlo,$hlo,$s0			; h+=Sigma1(e)
++	 and	$a3,$a3,$a1
++	srwi	$t1,$ahi,$Sigma0[0]
++	srwi	$s0,$ahi,$Sigma0[1]-32
++	 adde	$hhi,$hhi,$s1
++	srwi	$s1,$alo,$Sigma0[1]-32
++	insrwi	$t0,$ahi,$Sigma0[0],0
++	insrwi	$t1,$alo,$Sigma0[0],0
++	 xor	$a2,$a2,$blo			; Maj(a,b,c)
++	 addc	$dlo,$dlo,$hlo			; d+=h
++	 xor	$a3,$a3,$bhi
++	insrwi	$s0,$alo,$Sigma0[1]-32,0
++	insrwi	$s1,$ahi,$Sigma0[1]-32,0
++	 adde	$dhi,$dhi,$hhi
++	srwi	$t2,$ahi,$Sigma0[2]-32
++	srwi	$t3,$alo,$Sigma0[2]-32
++	xor	$s0,$s0,$t0
++	 addc	$hlo,$hlo,$a2			; h+=Maj(a,b,c)
++	xor	$s1,$s1,$t1
++	insrwi	$t2,$alo,$Sigma0[2]-32,0
++	insrwi	$t3,$ahi,$Sigma0[2]-32,0
++	 adde	$hhi,$hhi,$a3
++___
++$code.=<<___ if ($i>=15);
++	lwz	$t0,`$XOFF+0+$SZ*(($i+2)%16)`($sp)
++	lwz	$t1,`$XOFF+4+$SZ*(($i+2)%16)`($sp)
++___
++$code.=<<___ if ($i<15 && !$LITTLE_ENDIAN);
++	lwz	$t1,`$SZ*($i+1)+0`($inp)
++	lwz	$t0,`$SZ*($i+1)+4`($inp)
+ ___
++$code.=<<___ if ($i<15 && $LITTLE_ENDIAN);
++	lwz	$a2,`$SZ*($i+1)+0`($inp)
++	 lwz	$a3,`$SZ*($i+1)+4`($inp)
++	rotlwi	$t1,$a2,8
++	 rotlwi	$t0,$a3,8
++	rlwimi	$t1,$a2,24,0,7
++	 rlwimi	$t0,$a3,24,0,7
++	rlwimi	$t1,$a2,24,16,23
++	 rlwimi	$t0,$a3,24,16,23
++___
++$code.=<<___;
++	xor	$s0,$s0,$t2			; Sigma0(a)
++	xor	$s1,$s1,$t3
++	addc	$hlo,$hlo,$s0			; h+=Sigma0(a)
++	adde	$hhi,$hhi,$s1
++___
++$code.=<<___ if ($i==15);
++	lwz	$x0,`$XOFF+0+$SZ*(($i+1)%16)`($sp)
++	lwz	$x1,`$XOFF+4+$SZ*(($i+1)%16)`($sp)
++___
++}
++sub ROUND_16_xx_ppc32 {
++my ($i,	$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
++	$ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo)=@_;
++
++$code.=<<___;
++	srwi	$s0,$t0,$sigma0[0]
++	srwi	$s1,$t1,$sigma0[0]
++	srwi	$t2,$t0,$sigma0[1]
++	srwi	$t3,$t1,$sigma0[1]
++	insrwi	$s0,$t1,$sigma0[0],0
++	insrwi	$s1,$t0,$sigma0[0],0
++	srwi	$a0,$t0,$sigma0[2]
++	insrwi	$t2,$t1,$sigma0[1],0
++	insrwi	$t3,$t0,$sigma0[1],0
++	insrwi	$a0,$t1,$sigma0[2],0
++	xor	$s0,$s0,$t2
++	 lwz	$t2,`$XOFF+0+$SZ*(($i+14)%16)`($sp)
++	srwi	$a1,$t1,$sigma0[2]
++	xor	$s1,$s1,$t3
++	 lwz	$t3,`$XOFF+4+$SZ*(($i+14)%16)`($sp)
++	xor	$a0,$a0,$s0
++	 srwi	$s0,$t2,$sigma1[0]
++	xor	$a1,$a1,$s1
++	 srwi	$s1,$t3,$sigma1[0]
++	addc	$x0,$x0,$a0			; x[i]+=sigma0(x[i+1])
++	 srwi	$a0,$t3,$sigma1[1]-32
++	insrwi	$s0,$t3,$sigma1[0],0
++	insrwi	$s1,$t2,$sigma1[0],0
++	adde	$x1,$x1,$a1
++	 srwi	$a1,$t2,$sigma1[1]-32
++
++	insrwi	$a0,$t2,$sigma1[1]-32,0
++	srwi	$t2,$t2,$sigma1[2]
++	insrwi	$a1,$t3,$sigma1[1]-32,0
++	insrwi	$t2,$t3,$sigma1[2],0
++	xor	$s0,$s0,$a0
++	 lwz	$a0,`$XOFF+0+$SZ*(($i+9)%16)`($sp)
++	srwi	$t3,$t3,$sigma1[2]
++	xor	$s1,$s1,$a1
++	 lwz	$a1,`$XOFF+4+$SZ*(($i+9)%16)`($sp)
++	xor	$s0,$s0,$t2
++	 addc	$x0,$x0,$a0			; x[i]+=x[i+9]
++	xor	$s1,$s1,$t3
++	 adde	$x1,$x1,$a1
++	addc	$x0,$x0,$s0			; x[i]+=sigma1(x[i+14])
++	adde	$x1,$x1,$s1
++___
++	($t0,$t1,$x0,$x1) = ($x0,$x1,$t0,$t1);
++	&ROUND_00_15_ppc32(@_);
++}
++
++$code.=<<___;
++.align	4
++Lsha2_block_private:
++___
++$code.=<<___ if (!$LITTLE_ENDIAN);
++	lwz	$t1,0($inp)
++	xor	$a2,@V[3],@V[5]		; B^C, magic seed
++	lwz	$t0,4($inp)
++	xor	$a3,@V[2],@V[4]
++___
++$code.=<<___ if ($LITTLE_ENDIAN);
++	lwz	$a1,0($inp)
++	xor	$a2,@V[3],@V[5]		; B^C, magic seed
++	lwz	$a0,4($inp)
++	xor	$a3,@V[2],@V[4]
++	rotlwi	$t1,$a1,8
++	 rotlwi	$t0,$a0,8
++	rlwimi	$t1,$a1,24,0,7
++	 rlwimi	$t0,$a0,24,0,7
++	rlwimi	$t1,$a1,24,16,23
++	 rlwimi	$t0,$a0,24,16,23
++___
++for($i=0;$i<16;$i++) {
++	&ROUND_00_15_ppc32($i,@V);
++	unshift(@V,pop(@V));	unshift(@V,pop(@V));
++	($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
++}
++$code.=<<___;
++	li	$a0,`$rounds/16-1`
++	mtctr	$a0
++.align	4
++Lrounds:
++	addi	$Tbl,$Tbl,`16*$SZ`
++___
++for(;$i<32;$i++) {
++	&ROUND_16_xx_ppc32($i,@V);
++	unshift(@V,pop(@V));	unshift(@V,pop(@V));
++	($a0,$a1,$a2,$a3) = ($a2,$a3,$a0,$a1);
++}
++$code.=<<___;
++	bdnz-	Lrounds
++
++	$POP	$ctx,`$FRAME-$SIZE_T*22`($sp)
++	$POP	$inp,`$FRAME-$SIZE_T*23`($sp)	; inp pointer
++	$POP	$num,`$FRAME-$SIZE_T*24`($sp)	; end pointer
++	subi	$Tbl,$Tbl,`($rounds-16)*$SZ`	; rewind Tbl
++
++	lwz	$t0,`$LITTLE_ENDIAN^0`($ctx)
++	lwz	$t1,`$LITTLE_ENDIAN^4`($ctx)
++	lwz	$t2,`$LITTLE_ENDIAN^8`($ctx)
++	lwz	$t3,`$LITTLE_ENDIAN^12`($ctx)
++	lwz	$a0,`$LITTLE_ENDIAN^16`($ctx)
++	lwz	$a1,`$LITTLE_ENDIAN^20`($ctx)
++	lwz	$a2,`$LITTLE_ENDIAN^24`($ctx)
++	addc	@V[1],@V[1],$t1
++	lwz	$a3,`$LITTLE_ENDIAN^28`($ctx)
++	adde	@V[0],@V[0],$t0
++	lwz	$t0,`$LITTLE_ENDIAN^32`($ctx)
++	addc	@V[3],@V[3],$t3
++	lwz	$t1,`$LITTLE_ENDIAN^36`($ctx)
++	adde	@V[2],@V[2],$t2
++	lwz	$t2,`$LITTLE_ENDIAN^40`($ctx)
++	addc	@V[5],@V[5],$a1
++	lwz	$t3,`$LITTLE_ENDIAN^44`($ctx)
++	adde	@V[4],@V[4],$a0
++	lwz	$a0,`$LITTLE_ENDIAN^48`($ctx)
++	addc	@V[7],@V[7],$a3
++	lwz	$a1,`$LITTLE_ENDIAN^52`($ctx)
++	adde	@V[6],@V[6],$a2
++	lwz	$a2,`$LITTLE_ENDIAN^56`($ctx)
++	addc	@V[9],@V[9],$t1
++	lwz	$a3,`$LITTLE_ENDIAN^60`($ctx)
++	adde	@V[8],@V[8],$t0
++	stw	@V[0],`$LITTLE_ENDIAN^0`($ctx)
++	stw	@V[1],`$LITTLE_ENDIAN^4`($ctx)
++	addc	@V[11],@V[11],$t3
++	stw	@V[2],`$LITTLE_ENDIAN^8`($ctx)
++	stw	@V[3],`$LITTLE_ENDIAN^12`($ctx)
++	adde	@V[10],@V[10],$t2
++	stw	@V[4],`$LITTLE_ENDIAN^16`($ctx)
++	stw	@V[5],`$LITTLE_ENDIAN^20`($ctx)
++	addc	@V[13],@V[13],$a1
++	stw	@V[6],`$LITTLE_ENDIAN^24`($ctx)
++	stw	@V[7],`$LITTLE_ENDIAN^28`($ctx)
++	adde	@V[12],@V[12],$a0
++	stw	@V[8],`$LITTLE_ENDIAN^32`($ctx)
++	stw	@V[9],`$LITTLE_ENDIAN^36`($ctx)
++	addc	@V[15],@V[15],$a3
++	stw	@V[10],`$LITTLE_ENDIAN^40`($ctx)
++	stw	@V[11],`$LITTLE_ENDIAN^44`($ctx)
++	adde	@V[14],@V[14],$a2
++	stw	@V[12],`$LITTLE_ENDIAN^48`($ctx)
++	stw	@V[13],`$LITTLE_ENDIAN^52`($ctx)
++	stw	@V[14],`$LITTLE_ENDIAN^56`($ctx)
++	stw	@V[15],`$LITTLE_ENDIAN^60`($ctx)
++
++	addi	$inp,$inp,`16*$SZ`		; advance inp
++	$PUSH	$inp,`$FRAME-$SIZE_T*23`($sp)
++	$UCMP	$inp,$num
++	bne	Lsha2_block_private
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++.size	$func,.-$func
++___
++}
+ 
+ # Ugly hack here, because PPC assembler syntax seem to vary too
+ # much from platforms to platform...
+@@ -395,46 +727,46 @@ LPICmeup:
+ 	.space	`64-9*4`
+ ___
+ $code.=<<___ if ($SZ==8);
+-	.long	0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
+-	.long	0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
+-	.long	0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
+-	.long	0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
+-	.long	0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
+-	.long	0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
+-	.long	0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
+-	.long	0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
+-	.long	0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
+-	.long	0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
+-	.long	0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
+-	.long	0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
+-	.long	0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
+-	.long	0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
+-	.long	0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
+-	.long	0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
+-	.long	0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
+-	.long	0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
+-	.long	0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
+-	.long	0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
+-	.long	0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
+-	.long	0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
+-	.long	0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
+-	.long	0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
+-	.long	0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
+-	.long	0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
+-	.long	0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
+-	.long	0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
+-	.long	0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
+-	.long	0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
+-	.long	0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
+-	.long	0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
+-	.long	0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
+-	.long	0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
+-	.long	0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
+-	.long	0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
+-	.long	0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
+-	.long	0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
+-	.long	0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
+-	.long	0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
++	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
++	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
++	.quad	0x3956c25bf348b538,0x59f111f1b605d019
++	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
++	.quad	0xd807aa98a3030242,0x12835b0145706fbe
++	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
++	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
++	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
++	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
++	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
++	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
++	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
++	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
++	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
++	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
++	.quad	0x06ca6351e003826f,0x142929670a0e6e70
++	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
++	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
++	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
++	.quad	0x81c2c92e47edaee6,0x92722c851482353b
++	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
++	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
++	.quad	0xd192e819d6ef5218,0xd69906245565a910
++	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
++	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
++	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
++	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
++	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
++	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
++	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
++	.quad	0x90befffa23631e28,0xa4506cebde82bde9
++	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
++	.quad	0xca273eceea26619c,0xd186b8c721c0c207
++	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
++	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
++	.quad	0x113f9804bef90dae,0x1b710b35131c471b
++	.quad	0x28db77f523047d84,0x32caab7b40c72493
++	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
++	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
++	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+ ___
+ $code.=<<___ if ($SZ==4);
+ 	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+diff --git a/crypto/sha/asm/sha512p8-ppc.pl b/crypto/sha/asm/sha512p8-ppc.pl
+new file mode 100755
+index 0000000..a316b31
+--- /dev/null
++++ b/crypto/sha/asm/sha512p8-ppc.pl
+@@ -0,0 +1,423 @@
++#!/usr/bin/env perl
++
++# ====================================================================
++# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
++# project. The module is, however, dual licensed under OpenSSL and
++# CRYPTOGAMS licenses depending on where you obtain it. For further
++# details see http://www.openssl.org/~appro/cryptogams/.
++# ====================================================================
++
++# SHA256/512 for PowerISA v2.07.
++#
++# Accurate performance measurements are problematic, because it's
++# always virtualized setup with possibly throttled processor.
++# Relative comparison is therefore more informative. This module is
++# ~60% faster than integer-only sha512-ppc.pl. To anchor to something
++# else, SHA256 is 24% slower than sha1-ppc.pl and 2.5x slower than
++# hardware-assisted aes-128-cbc encrypt. SHA512 is 20% faster than
++# sha1-ppc.pl and 1.6x slower than aes-128-cbc. Another interesting
++# result is degree of computational resources' utilization. POWER8 is
++# "massively multi-threaded chip" and difference between single- and
++# maximum multi-process benchmark results tells that utlization is
++# whooping 94%. For sha512-ppc.pl we get [not unimpressive] 84% and
++# for sha1-ppc.pl - 73%. 100% means that multi-process result equals
++# to single-process one, given that all threads end up on the same
++# physical core.
++
++$flavour=shift;
++$output =shift;
++
++if ($flavour =~ /64/) {
++	$SIZE_T=8;
++	$LRSAVE=2*$SIZE_T;
++	$STU="stdu";
++	$POP="ld";
++	$PUSH="std";
++} elsif ($flavour =~ /32/) {
++	$SIZE_T=4;
++	$LRSAVE=$SIZE_T;
++	$STU="stwu";
++	$POP="lwz";
++	$PUSH="stw";
++} else { die "nonsense $flavour"; }
++
++$LENDIAN=($flavour=~/le/);
++
++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
++( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
++( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
++die "can't locate ppc-xlate.pl";
++
++open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
++
++if ($output =~ /512/) {
++	$bits=512;
++	$SZ=8;
++	$sz="d";
++	$rounds=80;
++} else {
++	$bits=256;
++	$SZ=4;
++	$sz="w";
++	$rounds=64;
++}
++
++$func="sha${bits}_block_p8";
++$FRAME=8*$SIZE_T;
++
++$sp ="r1";
++$toc="r2";
++$ctx="r3";
++$inp="r4";
++$num="r5";
++$Tbl="r6";
++$idx="r7";
++$lrsave="r8";
++$offload="r11";
++$vrsave="r12";
++($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,10,26..31));
++
++@V=($A,$B,$C,$D,$E,$F,$G,$H)=map("v$_",(0..7));
++@X=map("v$_",(8..23));
++($Ki,$Func,$S0,$S1,$s0,$s1,$lemask)=map("v$_",(24..31));
++
++sub ROUND {
++my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
++my $j=($i+1)%16;
++
++$code.=<<___		if ($i<15 && ($i%(16/$SZ))==(16/$SZ-1));
++	lvx_u		@X[$i+1],0,$inp		; load X[i] in advance
++	addi		$inp,$inp,16
++___
++$code.=<<___		if ($i<16 && ($i%(16/$SZ)));
++	vsldoi		@X[$i],@X[$i-1],@X[$i-1],$SZ
++___
++$code.=<<___		if ($LENDIAN && $i<16 && ($i%(16/$SZ))==0);
++	vperm		@X[$i],@X[$i],@X[$i],$lemask
++___
++$code.=<<___;
++	`"vshasigma${sz}	$s0,@X[($j+1)%16],0,0"		if ($i>=15)`
++	vsel		$Func,$g,$f,$e		; Ch(e,f,g)
++	vshasigma${sz}	$S1,$e,1,15		; Sigma1(e)
++	vaddu${sz}m	$h,$h,@X[$i%16]		; h+=X[i]
++	vshasigma${sz}	$S0,$a,1,0		; Sigma0(a)
++	`"vshasigma${sz}	$s1,@X[($j+14)%16],0,15"	if ($i>=15)`
++	vaddu${sz}m	$h,$h,$Func		; h+=Ch(e,f,g)
++	vxor		$Func,$a,$b
++	`"vaddu${sz}m		@X[$j],@X[$j],@X[($j+9)%16]"	if ($i>=15)`
++	vaddu${sz}m	$h,$h,$S1		; h+=Sigma1(e)
++	vsel		$Func,$b,$c,$Func	; Maj(a,b,c)
++	vaddu${sz}m	$g,$g,$Ki		; future h+=K[i]
++	vaddu${sz}m	$d,$d,$h		; d+=h
++	vaddu${sz}m	$S0,$S0,$Func		; Sigma0(a)+Maj(a,b,c)
++	`"vaddu${sz}m		@X[$j],@X[$j],$s0"		if ($i>=15)`
++	lvx		$Ki,$idx,$Tbl		; load next K[i]
++	addi		$idx,$idx,16
++	vaddu${sz}m	$h,$h,$S0		; h+=Sigma0(a)+Maj(a,b,c)
++	`"vaddu${sz}m		@X[$j],@X[$j],$s1"		if ($i>=15)`
++___
++}
++
++$code=<<___;
++.machine	"any"
++.text
++
++.globl	$func
++.align	6
++$func:
++	$STU		$sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
++	mflr		$lrsave
++	li		r10,`$FRAME+8*16+15`
++	li		r11,`$FRAME+8*16+31`
++	stvx		v20,r10,$sp		# ABI says so
++	addi		r10,r10,32
++	mfspr		$vrsave,256
++	stvx		v21,r11,$sp
++	addi		r11,r11,32
++	stvx		v22,r10,$sp
++	addi		r10,r10,32
++	stvx		v23,r11,$sp
++	addi		r11,r11,32
++	stvx		v24,r10,$sp
++	addi		r10,r10,32
++	stvx		v25,r11,$sp
++	addi		r11,r11,32
++	stvx		v26,r10,$sp
++	addi		r10,r10,32
++	stvx		v27,r11,$sp
++	addi		r11,r11,32
++	stvx		v28,r10,$sp
++	addi		r10,r10,32
++	stvx		v29,r11,$sp
++	addi		r11,r11,32
++	stvx		v30,r10,$sp
++	stvx		v31,r11,$sp
++	li		r11,-1
++	stw		$vrsave,`$FRAME+21*16-4`($sp)	# save vrsave
++	li		$x10,0x10
++	$PUSH		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++	li		$x20,0x20
++	$PUSH		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++	li		$x30,0x30
++	$PUSH		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++	li		$x40,0x40
++	$PUSH		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++	li		$x50,0x50
++	$PUSH		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++	li		$x60,0x60
++	$PUSH		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++	li		$x70,0x70
++	$PUSH		$lrsave,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
++	mtspr		256,r11
++
++	bl		LPICmeup
++	addi		$offload,$sp,$FRAME+15
++___
++$code.=<<___		if ($LENDIAN);
++	li		$idx,8
++	lvsl		$lemask,0,$idx
++	vspltisb	$Ki,0x0f
++	vxor		$lemask,$lemask,$Ki
++___
++$code.=<<___		if ($SZ==4);
++	lvx_4w		$A,$x00,$ctx
++	lvx_4w		$E,$x10,$ctx
++	vsldoi		$B,$A,$A,4		# unpack
++	vsldoi		$C,$A,$A,8
++	vsldoi		$D,$A,$A,12
++	vsldoi		$F,$E,$E,4
++	vsldoi		$G,$E,$E,8
++	vsldoi		$H,$E,$E,12
++___
++$code.=<<___		if ($SZ==8);
++	lvx_u		$A,$x00,$ctx
++	lvx_u		$C,$x10,$ctx
++	lvx_u		$E,$x20,$ctx
++	vsldoi		$B,$A,$A,8		# unpack
++	lvx_u		$G,$x30,$ctx
++	vsldoi		$D,$C,$C,8
++	vsldoi		$F,$E,$E,8
++	vsldoi		$H,$G,$G,8
++___
++$code.=<<___;
++	li		r0,`($rounds-16)/16`	# inner loop counter
++	b		Loop
++.align	5
++Loop:
++	lvx		$Ki,$x00,$Tbl
++	li		$idx,16
++	lvx_u		@X[0],0,$inp
++	addi		$inp,$inp,16
++	stvx		$A,$x00,$offload	# offload $A-$H
++	stvx		$B,$x10,$offload
++	stvx		$C,$x20,$offload
++	stvx		$D,$x30,$offload
++	stvx		$E,$x40,$offload
++	stvx		$F,$x50,$offload
++	stvx		$G,$x60,$offload
++	stvx		$H,$x70,$offload
++	vaddu${sz}m	$H,$H,$Ki		# h+K[i]
++	lvx		$Ki,$idx,$Tbl
++	addi		$idx,$idx,16
++___
++for ($i=0;$i<16;$i++)	{ &ROUND($i,@V); unshift(@V,pop(@V)); }
++$code.=<<___;
++	mtctr		r0
++	b		L16_xx
++.align	5
++L16_xx:
++___
++for (;$i<32;$i++)	{ &ROUND($i,@V); unshift(@V,pop(@V)); }
++$code.=<<___;
++	bdnz		L16_xx
++
++	lvx		@X[2],$x00,$offload
++	subic.		$num,$num,1
++	lvx		@X[3],$x10,$offload
++	vaddu${sz}m	$A,$A,@X[2]
++	lvx		@X[4],$x20,$offload
++	vaddu${sz}m	$B,$B,@X[3]
++	lvx		@X[5],$x30,$offload
++	vaddu${sz}m	$C,$C,@X[4]
++	lvx		@X[6],$x40,$offload
++	vaddu${sz}m	$D,$D,@X[5]
++	lvx		@X[7],$x50,$offload
++	vaddu${sz}m	$E,$E,@X[6]
++	lvx		@X[8],$x60,$offload
++	vaddu${sz}m	$F,$F,@X[7]
++	lvx		@X[9],$x70,$offload
++	vaddu${sz}m	$G,$G,@X[8]
++	vaddu${sz}m	$H,$H,@X[9]
++	bne		Loop
++___
++$code.=<<___		if ($SZ==4);
++	lvx		@X[0],$idx,$Tbl
++	addi		$idx,$idx,16
++	vperm		$A,$A,$B,$Ki		# pack the answer
++	lvx		@X[1],$idx,$Tbl
++	vperm		$E,$E,$F,$Ki
++	vperm		$A,$A,$C,@X[0]
++	vperm		$E,$E,$G,@X[0]
++	vperm		$A,$A,$D,@X[1]
++	vperm		$E,$E,$H,@X[1]
++	stvx_4w		$A,$x00,$ctx
++	stvx_4w		$E,$x10,$ctx
++___
++$code.=<<___		if ($SZ==8);
++	vperm		$A,$A,$B,$Ki		# pack the answer
++	vperm		$C,$C,$D,$Ki
++	vperm		$E,$E,$F,$Ki
++	vperm		$G,$G,$H,$Ki
++	stvx_u		$A,$x00,$ctx
++	stvx_u		$C,$x10,$ctx
++	stvx_u		$E,$x20,$ctx
++	stvx_u		$G,$x30,$ctx
++___
++$code.=<<___;
++	li		r10,`$FRAME+8*16+15`
++	mtlr		$lrsave
++	li		r11,`$FRAME+8*16+31`
++	mtspr		256,$vrsave
++	lvx		v20,r10,$sp		# ABI says so
++	addi		r10,r10,32
++	lvx		v21,r11,$sp
++	addi		r11,r11,32
++	lvx		v22,r10,$sp
++	addi		r10,r10,32
++	lvx		v23,r11,$sp
++	addi		r11,r11,32
++	lvx		v24,r10,$sp
++	addi		r10,r10,32
++	lvx		v25,r11,$sp
++	addi		r11,r11,32
++	lvx		v26,r10,$sp
++	addi		r10,r10,32
++	lvx		v27,r11,$sp
++	addi		r11,r11,32
++	lvx		v28,r10,$sp
++	addi		r10,r10,32
++	lvx		v29,r11,$sp
++	addi		r11,r11,32
++	lvx		v30,r10,$sp
++	lvx		v31,r11,$sp
++	$POP		r26,`$FRAME+21*16+0*$SIZE_T`($sp)
++	$POP		r27,`$FRAME+21*16+1*$SIZE_T`($sp)
++	$POP		r28,`$FRAME+21*16+2*$SIZE_T`($sp)
++	$POP		r29,`$FRAME+21*16+3*$SIZE_T`($sp)
++	$POP		r30,`$FRAME+21*16+4*$SIZE_T`($sp)
++	$POP		r31,`$FRAME+21*16+5*$SIZE_T`($sp)
++	addi		$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
++	blr
++	.long		0
++	.byte		0,12,4,1,0x80,6,3,0
++	.long		0
++.size	$func,.-$func
++___
++
++# Ugly hack here, because PPC assembler syntax seem to vary too
++# much from platforms to platform...
++$code.=<<___;
++.align	6
++LPICmeup:
++	mflr	r0
++	bcl	20,31,\$+4
++	mflr	$Tbl	; vvvvvv "distance" between . and 1st data entry
++	addi	$Tbl,$Tbl,`64-8`
++	mtlr	r0
++	blr
++	.long	0
++	.byte	0,12,0x14,0,0,0,0,0
++	.space	`64-9*4`
++___
++
++if ($SZ==8) {
++    local *table = sub {
++	foreach(@_) { $code.=".quad	$_,$_\n"; }
++    };
++    table(
++	"0x428a2f98d728ae22","0x7137449123ef65cd",
++	"0xb5c0fbcfec4d3b2f","0xe9b5dba58189dbbc",
++	"0x3956c25bf348b538","0x59f111f1b605d019",
++	"0x923f82a4af194f9b","0xab1c5ed5da6d8118",
++	"0xd807aa98a3030242","0x12835b0145706fbe",
++	"0x243185be4ee4b28c","0x550c7dc3d5ffb4e2",
++	"0x72be5d74f27b896f","0x80deb1fe3b1696b1",
++	"0x9bdc06a725c71235","0xc19bf174cf692694",
++	"0xe49b69c19ef14ad2","0xefbe4786384f25e3",
++	"0x0fc19dc68b8cd5b5","0x240ca1cc77ac9c65",
++	"0x2de92c6f592b0275","0x4a7484aa6ea6e483",
++	"0x5cb0a9dcbd41fbd4","0x76f988da831153b5",
++	"0x983e5152ee66dfab","0xa831c66d2db43210",
++	"0xb00327c898fb213f","0xbf597fc7beef0ee4",
++	"0xc6e00bf33da88fc2","0xd5a79147930aa725",
++	"0x06ca6351e003826f","0x142929670a0e6e70",
++	"0x27b70a8546d22ffc","0x2e1b21385c26c926",
++	"0x4d2c6dfc5ac42aed","0x53380d139d95b3df",
++	"0x650a73548baf63de","0x766a0abb3c77b2a8",
++	"0x81c2c92e47edaee6","0x92722c851482353b",
++	"0xa2bfe8a14cf10364","0xa81a664bbc423001",
++	"0xc24b8b70d0f89791","0xc76c51a30654be30",
++	"0xd192e819d6ef5218","0xd69906245565a910",
++	"0xf40e35855771202a","0x106aa07032bbd1b8",
++	"0x19a4c116b8d2d0c8","0x1e376c085141ab53",
++	"0x2748774cdf8eeb99","0x34b0bcb5e19b48a8",
++	"0x391c0cb3c5c95a63","0x4ed8aa4ae3418acb",
++	"0x5b9cca4f7763e373","0x682e6ff3d6b2b8a3",
++	"0x748f82ee5defb2fc","0x78a5636f43172f60",
++	"0x84c87814a1f0ab72","0x8cc702081a6439ec",
++	"0x90befffa23631e28","0xa4506cebde82bde9",
++	"0xbef9a3f7b2c67915","0xc67178f2e372532b",
++	"0xca273eceea26619c","0xd186b8c721c0c207",
++	"0xeada7dd6cde0eb1e","0xf57d4f7fee6ed178",
++	"0x06f067aa72176fba","0x0a637dc5a2c898a6",
++	"0x113f9804bef90dae","0x1b710b35131c471b",
++	"0x28db77f523047d84","0x32caab7b40c72493",
++	"0x3c9ebe0a15c9bebc","0x431d67c49c100d4c",
++	"0x4cc5d4becb3e42b6","0x597f299cfc657e2a",
++	"0x5fcb6fab3ad6faec","0x6c44198c4a475817","0");
++$code.=<<___	if (!$LENDIAN);
++.quad	0x0001020304050607,0x1011121314151617
++___
++$code.=<<___	if ($LENDIAN);	# quad-swapped
++.quad	0x1011121314151617,0x0001020304050607
++___
++} else {
++    local *table = sub {
++	foreach(@_) { $code.=".long	$_,$_,$_,$_\n"; }
++    };
++    table(
++	"0x428a2f98","0x71374491","0xb5c0fbcf","0xe9b5dba5",
++	"0x3956c25b","0x59f111f1","0x923f82a4","0xab1c5ed5",
++	"0xd807aa98","0x12835b01","0x243185be","0x550c7dc3",
++	"0x72be5d74","0x80deb1fe","0x9bdc06a7","0xc19bf174",
++	"0xe49b69c1","0xefbe4786","0x0fc19dc6","0x240ca1cc",
++	"0x2de92c6f","0x4a7484aa","0x5cb0a9dc","0x76f988da",
++	"0x983e5152","0xa831c66d","0xb00327c8","0xbf597fc7",
++	"0xc6e00bf3","0xd5a79147","0x06ca6351","0x14292967",
++	"0x27b70a85","0x2e1b2138","0x4d2c6dfc","0x53380d13",
++	"0x650a7354","0x766a0abb","0x81c2c92e","0x92722c85",
++	"0xa2bfe8a1","0xa81a664b","0xc24b8b70","0xc76c51a3",
++	"0xd192e819","0xd6990624","0xf40e3585","0x106aa070",
++	"0x19a4c116","0x1e376c08","0x2748774c","0x34b0bcb5",
++	"0x391c0cb3","0x4ed8aa4a","0x5b9cca4f","0x682e6ff3",
++	"0x748f82ee","0x78a5636f","0x84c87814","0x8cc70208",
++	"0x90befffa","0xa4506ceb","0xbef9a3f7","0xc67178f2","0");
++$code.=<<___	if (!$LENDIAN);
++.long	0x00010203,0x10111213,0x10111213,0x10111213
++.long	0x00010203,0x04050607,0x10111213,0x10111213
++.long	0x00010203,0x04050607,0x08090a0b,0x10111213
++___
++$code.=<<___	if ($LENDIAN);	# word-swapped
++.long	0x10111213,0x10111213,0x10111213,0x00010203
++.long	0x10111213,0x10111213,0x04050607,0x00010203
++.long	0x10111213,0x08090a0b,0x04050607,0x00010203
++___
++}
++$code.=<<___;
++.asciz	"SHA${bits} for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
++.align	2
++___
++
++$code =~ s/\`([^\`]*)\`/eval $1/gem;
++print $code;
++close STDOUT;
diff --git a/SOURCES/openssl-1.0.1e-rpmbuild.patch b/SOURCES/openssl-1.0.1e-rpmbuild.patch
new file mode 100644
index 0000000..14b2ba9
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-rpmbuild.patch
@@ -0,0 +1,112 @@
+diff -up openssl-1.0.1e/Configure.rpmbuild openssl-1.0.1e/Configure
+--- openssl-1.0.1e/Configure.rpmbuild	2014-08-13 19:19:53.211005598 +0200
++++ openssl-1.0.1e/Configure	2014-08-13 19:29:21.704099285 +0200
+@@ -345,24 +345,24 @@ my %table=(
+ ####
+ # *-generic* is endian-neutral target, but ./config is free to
+ # throw in -D[BL]_ENDIAN, whichever appropriate...
+-"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+-"linux-ppc",	"gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-generic32","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
++"linux-ppc",	"gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+ # It's believed that majority of ARM toolchains predefine appropriate -march.
+ # If you compiler does not, do complement config command line with one!
+-"linux-armv4",	"gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-armv4",	"gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+ #### IA-32 targets...
+ "linux-ia32-icc",	"icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+-"linux-elf",	"gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-elf",	"gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+ "linux-aout",	"gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -march=i486 -Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out",
+ ####
+-"linux-generic64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+-"linux-ppc64",	"gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+-"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:$ppc64_asm:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
+-"linux-ia64",	"gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-generic64","gcc:-DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
++"linux-ppc64",	"gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
++"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
++"linux-ia64",	"gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+ "linux-ia64-ecc","ecc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+ "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall -no_cpprt::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+-"linux-x86_64",	"gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+-"linux64-s390x",	"gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
++"linux-x86_64",	"gcc:-m64 -DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
++"linux64-s390x",	"gcc:-m64 -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
+ #### So called "highgprs" target for z/Architecture CPUs
+ # "Highgprs" is kernel feature first implemented in Linux 2.6.32, see
+ # /proc/cpuinfo. The idea is to preserve most significant bits of
+@@ -376,16 +376,17 @@ my %table=(
+ # ldconfig and run-time linker to autodiscover. Unfortunately it
+ # doesn't work just yet, because of couple of bugs in glibc
+ # sysdeps/s390/dl-procinfo.c affecting ldconfig and ld.so.1...
+-"linux32-s390x",	"gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/highgprs",
++"linux32-s390x",	"gcc:-m31 -Wa,-mzarch -DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:".eval{my $asm=$s390x_asm;$asm=~s/bn\-s390x\.o/bn_asm.o/;$asm}.":31:dlfcn:linux-shared:-fPIC:-m31 \$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::/highgprs",
+ #### SPARC Linux setups
+ # Ray Miller <ray.miller@computing-services.oxford.ac.uk> has patiently
+ # assisted with debugging of following two configs.
+-"linux-sparcv8","gcc:-mv8 -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-sparcv8","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT::-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+ # it's a real mess with -mcpu=ultrasparc option under Linux, but
+ # -Wa,-Av8plus should do the trick no matter what.
+-"linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall -Wa,-Av8plus -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS) -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+ # GCC 3.1 is a requirement
+-"linux64-sparcv9","gcc:-m64 -mcpu=ultrasparc -DB_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
++"linux64-sparcv9","gcc:-DB_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT:ULTRASPARC:-Wl,-z,relro -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
++"linux-aarch64","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER):::64",
+ #### Alpha Linux with GNU C and Compaq C setups
+ # Special notes:
+ # - linux-alpha+bwx-gcc is ment to be used from ./config only. If you
+@@ -399,8 +400,8 @@ my %table=(
+ #
+ #					<appro@fy.chalmers.se>
+ #
+-"linux-alpha-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+-"linux-alpha+bwx-gcc","gcc:-O3 -DL_ENDIAN -DTERMIO::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
++"linux-alpha-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
++"linux-alpha+bwx-gcc","gcc:-DL_ENDIAN -DTERMIO -Wall \$(RPM_OPT_FLAGS)::-D_REENTRANT::-Wl,-z,relro -ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC:\$(RPM_OPT_FLAGS):.so.\$(SHLIB_SONAMEVER)",
+ "linux-alpha-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
+ "linux-alpha+bwx-ccc","ccc:-fast -readonly_strings -DL_ENDIAN -DTERMIO::-D_REENTRANT:::SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL:${alpha_asm}",
+ 
+@@ -1675,7 +1676,7 @@ while (<IN>)
+ 	elsif ($shared_extension ne "" && $shared_extension =~ /^\.s([ol])\.[^\.]*\.[^\.]*$/)
+ 		{
+ 		my $sotmp = $1;
+-		s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_MAJOR) .s$sotmp/;
++		s/^SHARED_LIBS_LINK_EXTS=.*/SHARED_LIBS_LINK_EXTS=.s$sotmp.\$(SHLIB_SONAMEVER) .s$sotmp/;
+ 		}
+ 	elsif ($shared_extension ne "" && $shared_extension =~ /^\.[^\.]*\.[^\.]*\.dylib$/)
+ 		{
+diff -up openssl-1.0.1e/Makefile.org.rpmbuild openssl-1.0.1e/Makefile.org
+--- openssl-1.0.1e/Makefile.org.rpmbuild	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/Makefile.org	2014-08-13 19:19:53.218005759 +0200
+@@ -10,6 +10,7 @@ SHLIB_VERSION_HISTORY=
+ SHLIB_MAJOR=
+ SHLIB_MINOR=
+ SHLIB_EXT=
++SHLIB_SONAMEVER=10
+ PLATFORM=dist
+ OPTIONS=
+ CONFIGURE_ARGS=
+@@ -333,10 +334,9 @@ clean-shared:
+ link-shared:
+ 	@ set -e; for i in $(SHLIBDIRS); do \
+ 		$(MAKE) -f $(HERE)/Makefile.shared -e $(BUILDENV) \
+-			LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \
++			LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \
+ 			LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \
+ 			symlink.$(SHLIB_TARGET); \
+-		libs="$$libs -l$$i"; \
+ 	done
+ 
+ build-shared: do_$(SHLIB_TARGET) link-shared
+@@ -347,7 +347,7 @@ do_$(SHLIB_TARGET):
+ 			libs="$(LIBKRB5) $$libs"; \
+ 		fi; \
+ 		$(CLEARENV) && $(MAKE) -f Makefile.shared -e $(BUILDENV) \
+-			LIBNAME=$$i LIBVERSION=$(SHLIB_MAJOR).$(SHLIB_MINOR) \
++			LIBNAME=$$i LIBVERSION=$(SHLIB_SONAMEVER) \
+ 			LIBCOMPATVERSIONS=";$(SHLIB_VERSION_HISTORY)" \
+ 			LIBDEPS="$$libs $(EX_LIBS)" \
+ 			link_a.$(SHLIB_TARGET); \
diff --git a/SOURCES/openssl-1.0.1e-sn-case.patch b/SOURCES/openssl-1.0.1e-sn-case.patch
new file mode 100644
index 0000000..eb5955a
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-sn-case.patch
@@ -0,0 +1,12 @@
+diff -up openssl-1.0.1e/apps/s_server.c.sn-case openssl-1.0.1e/apps/s_server.c
+--- openssl-1.0.1e/apps/s_server.c.sn-case	2014-09-17 15:31:51.000000000 +0200
++++ openssl-1.0.1e/apps/s_server.c	2014-09-17 15:43:04.619321492 +0200
+@@ -744,7 +744,7 @@ static int MS_CALLBACK ssl_servername_cb
+ 	
+ 	if (servername)
+ 		{
+-    		if (strcmp(servername,p->servername)) 
++		if (strcasecmp(servername,p->servername)) 
+ 			return p->extension_error;
+ 		if (ctx2)
+ 			{
diff --git a/SOURCES/openssl-1.0.1e-ssl2-no-ec.patch b/SOURCES/openssl-1.0.1e-ssl2-no-ec.patch
new file mode 100644
index 0000000..81ad472
--- /dev/null
+++ b/SOURCES/openssl-1.0.1e-ssl2-no-ec.patch
@@ -0,0 +1,17 @@
+diff -up openssl-1.0.1e/ssl/s23_lib.c.ssl2noec openssl-1.0.1e/ssl/s23_lib.c
+--- openssl-1.0.1e/ssl/s23_lib.c.ssl2noec	2013-02-11 16:26:04.000000000 +0100
++++ openssl-1.0.1e/ssl/s23_lib.c	2014-05-06 15:51:54.053293674 +0200
+@@ -107,6 +107,13 @@ int ssl23_put_cipher_by_char(const SSL_C
+ 	long l;
+ 
+ 	/* We can write SSLv2 and SSLv3 ciphers */
++	/* but no ECC ciphers */
++	if (c->algorithm_mkey == SSL_kECDHr ||
++		c->algorithm_mkey == SSL_kECDHe ||
++		c->algorithm_mkey == SSL_kEECDH ||
++		c->algorithm_auth == SSL_aECDH ||
++		c->algorithm_auth == SSL_aECDSA)
++		return 0;
+ 	if (p != NULL)
+ 		{
+ 		l=c->id;
diff --git a/SOURCES/opensslconf-new.h b/SOURCES/opensslconf-new.h
index cf22738..bd56c73 100644
--- a/SOURCES/opensslconf-new.h
+++ b/SOURCES/opensslconf-new.h
@@ -14,7 +14,12 @@
 #elif defined(__ia64__)
 #include "opensslconf-ia64.h"
 #elif defined(__powerpc64__)
+#include <endian.h>
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 #include "opensslconf-ppc64.h"
+#else
+#include "opensslconf-ppc64le.h"
+#endif
 #elif defined(__powerpc__)
 #include "opensslconf-ppc.h"
 #elif defined(__s390x__)
diff --git a/SPECS/openssl.spec b/SPECS/openssl.spec
index b01854b..4900dd3 100644
--- a/SPECS/openssl.spec
+++ b/SPECS/openssl.spec
@@ -23,7 +23,7 @@
 Summary: Utilities from the general purpose cryptography library with TLS implementation
 Name: openssl
 Version: 1.0.1e
-Release: 34%{?dist}.7
+Release: 42%{?dist}
 Epoch: 1
 # We have to remove certain patented algorithms from the openssl source
 # tarball with the hobble-openssl script which is included below.
@@ -40,7 +40,7 @@ Source11: README.FIPS
 Source12: ec_curve.c
 Source13: ectest.c
 # Build changes
-Patch1: openssl-1.0.1-beta2-rpmbuild.patch
+Patch1: openssl-1.0.1e-rpmbuild.patch
 Patch2: openssl-1.0.1e-defaults.patch
 Patch4: openssl-1.0.0-beta5-enginesdir.patch
 Patch5: openssl-0.9.8a-no-rpath.patch
@@ -48,6 +48,8 @@ Patch6: openssl-0.9.8b-test-use-localhost.patch
 Patch7: openssl-1.0.0-timezone.patch
 Patch8: openssl-1.0.1c-perlfind.patch
 Patch9: openssl-1.0.1c-aliasing.patch
+# This patch must be applied first
+Patch10: openssl-1.0.1e-ppc-asm-update.patch
 # Bug fixes
 Patch23: openssl-1.0.1c-default-paths.patch
 Patch24: openssl-1.0.1e-issuer-hash.patch
@@ -82,7 +84,10 @@ Patch76: openssl-1.0.1e-new-fips-reqs.patch
 Patch77: openssl-1.0.1e-weak-ciphers.patch
 Patch78: openssl-1.0.1e-3des-strength.patch
 Patch79: openssl-1.0.1e-req-keylen.patch
+Patch41: openssl-1.0.1e-ssl2-no-ec.patch
+Patch42: openssl-1.0.1e-enc-fail.patch
 # Backported fixes including security fixes
+Patch80: openssl-1.0.1e-evp-wrap.patch
 Patch81: openssl-1.0.1-beta2-padlock64.patch
 Patch82: openssl-1.0.1e-backports.patch
 Patch83: openssl-1.0.1e-bad-mac.patch
@@ -99,6 +104,7 @@ Patch93: openssl-1.0.1e-cve-2014-0198.patch
 Patch94: openssl-1.0.1e-cve-2014-0221.patch
 Patch95: openssl-1.0.1e-cve-2014-0224.patch
 Patch96: openssl-1.0.1e-cve-2014-3470.patch
+Patch97: openssl-1.0.1e-dtls-ecc-ext.patch
 Patch100: openssl-1.0.1e-cve-2014-3505.patch
 Patch101: openssl-1.0.1e-cve-2014-3506.patch
 Patch102: openssl-1.0.1e-cve-2014-3507.patch
@@ -106,9 +112,13 @@ Patch103: openssl-1.0.1e-cve-2014-3508.patch
 Patch104: openssl-1.0.1e-cve-2014-3509.patch
 Patch105: openssl-1.0.1e-cve-2014-3510.patch
 Patch106: openssl-1.0.1e-cve-2014-3511.patch
+Patch107: openssl-1.0.1e-doc-ciphersuites.patch
+Patch108: openssl-1.0.1e-sn-case.patch
+Patch109: openssl-1.0.1e-ecdh-auto.patch
 Patch110: openssl-1.0.1e-cve-2014-3567.patch
 Patch111: openssl-1.0.1e-cve-2014-3513.patch
 Patch112: openssl-1.0.1e-fallback-scsv.patch
+Patch113: openssl-1.0.1e-copy-algo.patch
 Patch114: openssl-1.0.1e-cve-2014-3570.patch
 Patch115: openssl-1.0.1e-cve-2014-3571.patch
 Patch116: openssl-1.0.1e-cve-2014-3572.patch
@@ -116,6 +126,7 @@ Patch117: openssl-1.0.1e-cve-2014-8275.patch
 Patch118: openssl-1.0.1e-cve-2015-0204.patch
 Patch119: openssl-1.0.1e-cve-2015-0205.patch
 Patch120: openssl-1.0.1e-cve-2015-0206.patch
+Patch121: openssl-1.0.1e-cc-reqs.patch
 
 License: OpenSSL
 Group: System Environment/Libraries
@@ -187,6 +198,7 @@ from other formats to the formats used by the OpenSSL toolkit.
 
 cp %{SOURCE12} %{SOURCE13} crypto/ec/
 
+%patch10 -p1 -b .ppc-asm
 %patch1 -p1 -b .rpmbuild
 %patch2 -p1 -b .defaults
 %patch4 -p1 -b .enginesdir %{?_rawbuild}
@@ -228,7 +240,10 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
 %patch77 -p1 -b .weak-ciphers
 %patch78 -p1 -b .3des-strength
 %patch79 -p1 -b .keylen
+%patch41 -p1 -b .ssl2-noec
+%patch42 -p1 -b .enc-fail
 
+%patch80 -p1 -b .wrap
 %patch81 -p1 -b .padlock64
 %patch82 -p1 -b .backports
 %patch71 -p1 -b .manfix
@@ -246,6 +261,7 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
 %patch94 -p1 -b .dtls1-dos
 %patch95 -p1 -b .keying-mitm
 %patch96 -p1 -b .anon-ecdh-dos
+%patch97 -p1 -b .dtls-ecc-ext
 %patch100 -p1 -b .dtls-doublefree
 %patch101 -p1 -b .dtls-sizechecks
 %patch102 -p1 -b .dtls-memleak
@@ -253,9 +269,13 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
 %patch104 -p1 -b .tlsext-race
 %patch105 -p1 -b .adh-dos
 %patch106 -p1 -b .frag-downgrade
+%patch107 -p1 -b .doc-ciphersuites
+%patch108 -p1 -b .sn-case
+%patch109 -p1 -b .ecdh-auto
 %patch110 -p1 -b .ticket-leak
 %patch111 -p1 -b .srtp-leak
 %patch112 -p1 -b .fallback-scsv
+%patch113 -p1 -b .copy-algo
 %patch114 -p1 -b .bn-sqr
 %patch115 -p1 -b .dtls1-reads
 %patch116 -p1 -b .ecdh-downgrade
@@ -263,6 +283,7 @@ cp %{SOURCE12} %{SOURCE13} crypto/ec/
 %patch118 -p1 -b .rsa-ephemeral
 %patch119 -p1 -b .dh-unauthenticated
 %patch120 -p1 -b .dtls-rec-leak
+%patch121 -p1 -b .cc-reqs
 
 sed -i 's/SHLIB_VERSION_NUMBER "1.0.0"/SHLIB_VERSION_NUMBER "%{version}"/' crypto/opensslv.h
 
@@ -306,9 +327,12 @@ sslarch=linux-armv4
 %ifarch sh3 sh4
 sslarch=linux-generic32
 %endif
-%ifarch %{power64}
+%ifarch ppc64 ppc64p7
 sslarch=linux-ppc64
 %endif
+%ifarch ppc64le
+sslarch="linux-ppc64le"
+%endif
 
 # ia64, x86_64, ppc are OK by default
 # Configure the build tree.  Override OpenSSL defaults with known-good defaults
@@ -526,7 +550,11 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/fipscanister.*
 %postun libs -p /sbin/ldconfig
 
 %changelog
-* Tue Jan 13 2015 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-34.7
+* Thu Jan 15 2015 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-42
+- test in the non-FIPS RSA keygen for minimal distance of p and q
+  similarly to the FIPS RSA keygen
+
+* Tue Jan 13 2015 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-41
 - fix CVE-2014-3570 - incorrect computation in BN_sqr()
 - fix CVE-2014-3571 - possible crash in dtls1_get_record()
 - fix CVE-2014-3572 - possible downgrade of ECDH ciphersuite to non-PFS state
@@ -536,13 +564,38 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/fipscanister.*
 - fix CVE-2015-0205 - do not allow unauthenticated client DH certificate
 - fix CVE-2015-0206 - possible memory leak when buffering DTLS records
 
-* Wed Oct 15 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-34.6
+* Tue Oct 21 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-40
+- use FIPS approved method for computation of d in RSA
+- copy digest algorithm when handling SNI context switch
+
+* Wed Oct 15 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-39
 - fix CVE-2014-3567 - memory leak when handling session tickets
 - fix CVE-2014-3513 - memory leak in srtp support
 - add support for fallback SCSV to partially mitigate CVE-2014-3566
   (padding attack on SSL3)
 
-* Fri Aug  8 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-34.4
+* Wed Sep 24 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-38
+- do FIPS algorithm selftest before the integrity check
+
+* Thu Sep 18 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-37
+- add support for RFC 5649 (#1119738)
+- do not pass the FIPS integrity check if the .hmac files are empty (#1128849)
+- add ECC TLS extensions to DTLS (#1119803)
+- do not send ECC ciphersuites in SSLv2 client hello (#1090955)
+- properly propagate encryption failure in BIO_f_cipher (#1072439)
+- fix CVE-2014-0224 fix that broke EAP-FAST session resumption support
+- improve documentation of ciphersuites - patch by Hubert Kario (#1108026)
+- use case insensitive comparison for servername in s_server (#1081163)
+- add support for automatic ECDH curve selection on server (#1080128)
+- FIPS mode: make the limitations on DSA, DH, and RSA keygen
+  length enforced only if OPENSSL_ENFORCE_MODULUS_BITS environment
+  variable is set
+
+* Wed Aug 13 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-36
+- add support for ppc64le architecture
+- add Power 8 optimalizations
+
+* Fri Aug  8 2014 Tomáš Mráz <tmraz@redhat.com> 1.0.1e-35
 - fix CVE-2014-3505 - doublefree in DTLS packet processing
 - fix CVE-2014-3506 - avoid memory exhaustion in DTLS
 - fix CVE-2014-3507 - avoid memory leak in DTLS