|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
From 00464ad8a698fe7735737fab57420f8a44013890 Mon Sep 17 00:00:00 2001
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
From: Jon Severinsson <jon@severinsson.net>
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
Date: Tue, 8 Jul 2014 18:29:46 +0200
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
Subject: [PATCH] journal/compress: improve xz compression performance
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
The new lzma2 compression options at the top of compress_blob_xz are
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
equivalent to using preset "0", exept for using a 1 MiB dictionary
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
(the same as preset "1"). This makes the memory usage at most 7.5 MiB
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
in the compressor, and 1 MiB in the decompressor, instead of the
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
previous 92 MiB in the compressor and 8 MiB in the decompressor.
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
According to test-compress-benchmark this commit makes XZ compression
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
20 times faster, with no increase in compressed data size.
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
Using more realistic test data (an ELF binary rather than repeating
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
ASCII letters 'a' through 'z' in order) it only provides a factor 10
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
speedup, and at a cost if a 10% increase in compressed data size.
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
But that is still a worthwhile trade-off.
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
According to test-compress-benchmark XZ compression is still 25 times
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
slower than LZ4, but the compressed data is one eighth the size.
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
Using more realistic test data XZ compression is only 18 times slower
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
than LZ4, and the compressed data is only one quarter the size.
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
$ ./test-compress-benchmark
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
XZ: compressed & decompressed 2535300963 bytes in 42.30s (57.15MiB/s), mean compresion 99.95%, skipped 3570 bytes
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
LZ4: compressed & decompressed 2535303543 bytes in 1.60s (1510.60MiB/s), mean compresion 99.60%, skipped 990 bytes
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
(cherry picked from commit 1930eed2a7855d2df06ccf51f9e394428bf547e2)
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
Conflicts:
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
src/journal/compress.c
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
---
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
src/journal/compress.c | 14 ++++++++++++--
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
1 file changed, 12 insertions(+), 2 deletions(-)
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
diff --git a/src/journal/compress.c b/src/journal/compress.c
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
index 1fc62ead2a..9c0b74c455 100644
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
--- a/src/journal/compress.c
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+++ b/src/journal/compress.c
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
@@ -30,6 +30,13 @@
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
#include "util.h"
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
bool compress_blob(const void *src, uint64_t src_size, void *dst, uint64_t *dst_size) {
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ static const lzma_options_lzma opt = {
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ 1u << 20u, NULL, 0, LZMA_LC_DEFAULT, LZMA_LP_DEFAULT,
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ LZMA_PB_DEFAULT, LZMA_MODE_FAST, 128, LZMA_MF_HC3, 4};
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ static const lzma_filter filters[2] = {
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ {LZMA_FILTER_LZMA2, (lzma_options_lzma*) &opt},
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ {LZMA_VLI_UNKNOWN, NULL}
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ };
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
lzma_ret ret;
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
size_t out_pos = 0;
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
@@ -41,8 +48,11 @@ bool compress_blob(const void *src, uint64_t src_size, void *dst, uint64_t *dst_
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
/* Returns false if we couldn't compress the data or the
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
* compressed result is longer than the original */
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
- ret = lzma_easy_buffer_encode(LZMA_PRESET_DEFAULT, LZMA_CHECK_NONE, NULL,
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
- src, src_size, dst, &out_pos, src_size);
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ if (src_size < 80)
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ return -ENOBUFS;
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ ret = lzma_stream_buffer_encode((lzma_filter*) filters, LZMA_CHECK_NONE, NULL,
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
+ src, src_size, dst, &out_pos, src_size - 1);
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
if (ret != LZMA_OK)
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
return false;
|
|
Zbigniew Jędrzejewski-Szmek |
43ff24 |
|