|
|
4cad4c |
From 696d56fc75e72f47e4d3232a2140fac10b6b44de Mon Sep 17 00:00:00 2001
|
|
|
4cad4c |
From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= <zbyszek@in.waw.pl>
|
|
|
4cad4c |
Date: Mon, 29 Oct 2018 14:55:33 +0100
|
|
|
4cad4c |
Subject: [PATCH] journal: adapt for new improved LZ4_decompress_safe_partial()
|
|
|
4cad4c |
MIME-Version: 1.0
|
|
|
4cad4c |
Content-Type: text/plain; charset=UTF-8
|
|
|
4cad4c |
Content-Transfer-Encoding: 8bit
|
|
|
4cad4c |
|
|
|
4cad4c |
With lz4 1.8.3, this function can now decompress partial results into a smaller
|
|
|
4cad4c |
buffer. The release news don't say anything interesting, but the test case that
|
|
|
4cad4c |
was previously failing now works OK.
|
|
|
4cad4c |
|
|
|
4cad4c |
Fixes #10259.
|
|
|
4cad4c |
|
|
|
4cad4c |
A test is added. It shows that with *older* lz4, a partial decompression can
|
|
|
4cad4c |
occur with the returned size smaller then the requested number of bytes _and_
|
|
|
4cad4c |
smaller then the size of the compressed data:
|
|
|
4cad4c |
|
|
|
4cad4c |
(lz4-libs-1.8.2-1.fc29.x86_64)
|
|
|
4cad4c |
Compressed 4194304 → 16464
|
|
|
4cad4c |
Decompressed → 4194304
|
|
|
4cad4c |
Decompressed partial 12/4194304 → 4194304
|
|
|
4cad4c |
Decompressed partial 1/1 → -2 (bad)
|
|
|
4cad4c |
Decompressed partial 2/2 → -2 (bad)
|
|
|
4cad4c |
Decompressed partial 3/3 → -2 (bad)
|
|
|
4cad4c |
Decompressed partial 4/4 → -2 (bad)
|
|
|
4cad4c |
Decompressed partial 5/5 → -2 (bad)
|
|
|
4cad4c |
Decompressed partial 6/6 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 7/7 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 8/8 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 9/9 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 10/10 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 11/11 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 12/12 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 13/13 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 14/14 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 15/15 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 16/16 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 17/17 → 6 (good)
|
|
|
4cad4c |
Decompressed partial 18/18 → -16459 (bad)
|
|
|
4cad4c |
|
|
|
4cad4c |
(lz4-libs-1.8.3-1.fc29.x86_64)
|
|
|
4cad4c |
Compressed 4194304 → 16464
|
|
|
4cad4c |
Decompressed → 4194304
|
|
|
4cad4c |
Decompressed partial 12/4194304 → 12
|
|
|
4cad4c |
Decompressed partial 1/1 → 1 (good)
|
|
|
4cad4c |
Decompressed partial 2/2 → 2 (good)
|
|
|
4cad4c |
Decompressed partial 3/3 → 3 (good)
|
|
|
4cad4c |
Decompressed partial 4/4 → 4 (good)
|
|
|
4cad4c |
...
|
|
|
4cad4c |
|
|
|
4cad4c |
If we got such a short "successful" decompression in decompress_startswith() as
|
|
|
4cad4c |
implemented before this patch, we could be confused and return a false negative
|
|
|
4cad4c |
result. But it turns out that this only occurs with small output buffer
|
|
|
4cad4c |
sizes. We use greedy_realloc() to manager the buffer, so it is always at least
|
|
|
4cad4c |
64 bytes. I couldn't hit a case where decompress_startswith() would actually
|
|
|
4cad4c |
return a bogus result. But since the lack of proof is not conclusive, the code
|
|
|
4cad4c |
for *older* lz4 is changed too, just to be safe. We cannot rule out that on a
|
|
|
4cad4c |
different architecture or with some unlucky compressed string we could hit this
|
|
|
4cad4c |
corner case.
|
|
|
4cad4c |
|
|
|
4cad4c |
The fallback code is guarded by a version check. The check uses a function not
|
|
|
4cad4c |
the compile-time define, because there was no soversion bump in lz4 or new
|
|
|
4cad4c |
symbols, and we could be compiled against a newer lz4 and linked at runtime
|
|
|
4cad4c |
with an older one. (This happens routinely e.g. when somebody upgrades a subset
|
|
|
4cad4c |
of distro packages.)
|
|
|
4cad4c |
|
|
|
4cad4c |
(cherry picked from commit e41ef6fd0027d3619dc1cf062100b2d224d0ee7e)
|
|
|
4cad4c |
Resolves: #1843871
|
|
|
4cad4c |
---
|
|
|
4cad4c |
src/journal/compress.c | 39 ++++++++++++++++++++++++-------------
|
|
|
4cad4c |
src/journal/test-compress.c | 21 ++++++++++----------
|
|
|
4cad4c |
2 files changed, 37 insertions(+), 23 deletions(-)
|
|
|
4cad4c |
|
|
|
4cad4c |
diff --git a/src/journal/compress.c b/src/journal/compress.c
|
|
|
4cad4c |
index a4a5e63840..e95ce2bcaa 100644
|
|
|
4cad4c |
--- a/src/journal/compress.c
|
|
|
4cad4c |
+++ b/src/journal/compress.c
|
|
|
4cad4c |
@@ -290,7 +290,6 @@ int decompress_startswith_lz4(const void *src, uint64_t src_size,
|
|
|
4cad4c |
* prefix */
|
|
|
4cad4c |
|
|
|
4cad4c |
int r;
|
|
|
4cad4c |
- size_t size;
|
|
|
4cad4c |
|
|
|
4cad4c |
assert(src);
|
|
|
4cad4c |
assert(src_size > 0);
|
|
|
4cad4c |
@@ -307,23 +306,37 @@ int decompress_startswith_lz4(const void *src, uint64_t src_size,
|
|
|
4cad4c |
|
|
|
4cad4c |
r = LZ4_decompress_safe_partial((char*)src + 8, *buffer, src_size - 8,
|
|
|
4cad4c |
prefix_len + 1, *buffer_size);
|
|
|
4cad4c |
- if (r >= 0)
|
|
|
4cad4c |
- size = (unsigned) r;
|
|
|
4cad4c |
- else {
|
|
|
4cad4c |
- /* lz4 always tries to decode full "sequence", so in
|
|
|
4cad4c |
- * pathological cases might need to decompress the
|
|
|
4cad4c |
- * full field. */
|
|
|
4cad4c |
+ /* One lz4 < 1.8.3, we might get "failure" (r < 0), or "success" where
|
|
|
4cad4c |
+ * just a part of the buffer is decompressed. But if we get a smaller
|
|
|
4cad4c |
+ * amount of bytes than requested, we don't know whether there isn't enough
|
|
|
4cad4c |
+ * data to fill the requested size or whether we just got a partial answer.
|
|
|
4cad4c |
+ */
|
|
|
4cad4c |
+ if (r < 0 || (size_t) r < prefix_len + 1) {
|
|
|
4cad4c |
+ size_t size;
|
|
|
4cad4c |
+
|
|
|
4cad4c |
+ if (LZ4_versionNumber() >= 10803)
|
|
|
4cad4c |
+ /* We trust that the newer lz4 decompresses the number of bytes we
|
|
|
4cad4c |
+ * requested if available in the compressed string. */
|
|
|
4cad4c |
+ return 0;
|
|
|
4cad4c |
+
|
|
|
4cad4c |
+ if (r > 0)
|
|
|
4cad4c |
+ /* Compare what we have first, in case of mismatch we can
|
|
|
4cad4c |
+ * shortcut the full comparison. */
|
|
|
4cad4c |
+ if (memcmp(*buffer, prefix, r) != 0)
|
|
|
4cad4c |
+ return 0;
|
|
|
4cad4c |
+
|
|
|
4cad4c |
+ /* Before version 1.8.3, lz4 always tries to decode full a "sequence",
|
|
|
4cad4c |
+ * so in pathological cases might need to decompress the full field. */
|
|
|
4cad4c |
r = decompress_blob_lz4(src, src_size, buffer, buffer_size, &size, 0);
|
|
|
4cad4c |
if (r < 0)
|
|
|
4cad4c |
return r;
|
|
|
4cad4c |
- }
|
|
|
4cad4c |
|
|
|
4cad4c |
- if (size >= prefix_len + 1)
|
|
|
4cad4c |
- return memcmp(*buffer, prefix, prefix_len) == 0 &&
|
|
|
4cad4c |
- ((const uint8_t*) *buffer)[prefix_len] == extra;
|
|
|
4cad4c |
- else
|
|
|
4cad4c |
- return 0;
|
|
|
4cad4c |
+ if (size < prefix_len + 1)
|
|
|
4cad4c |
+ return 0;
|
|
|
4cad4c |
+ }
|
|
|
4cad4c |
|
|
|
4cad4c |
+ return memcmp(*buffer, prefix, prefix_len) == 0 &&
|
|
|
4cad4c |
+ ((const uint8_t*) *buffer)[prefix_len] == extra;
|
|
|
4cad4c |
#else
|
|
|
4cad4c |
return -EPROTONOSUPPORT;
|
|
|
4cad4c |
#endif
|
|
|
4cad4c |
diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c
|
|
|
4cad4c |
index 65cd3fbfeb..f60c4ae3d7 100644
|
|
|
4cad4c |
--- a/src/journal/test-compress.c
|
|
|
4cad4c |
+++ b/src/journal/test-compress.c
|
|
|
4cad4c |
@@ -223,13 +223,13 @@ static void test_compress_stream(int compression,
|
|
|
4cad4c |
|
|
|
4cad4c |
#if HAVE_LZ4
|
|
|
4cad4c |
static void test_lz4_decompress_partial(void) {
|
|
|
4cad4c |
- char buf[20000];
|
|
|
4cad4c |
+ char buf[20000], buf2[100];
|
|
|
4cad4c |
size_t buf_size = sizeof(buf), compressed;
|
|
|
4cad4c |
int r;
|
|
|
4cad4c |
_cleanup_free_ char *huge = NULL;
|
|
|
4cad4c |
|
|
|
4cad4c |
#define HUGE_SIZE (4096*1024)
|
|
|
4cad4c |
- huge = malloc(HUGE_SIZE);
|
|
|
4cad4c |
+ assert_se(huge = malloc(HUGE_SIZE));
|
|
|
4cad4c |
memset(huge, 'x', HUGE_SIZE);
|
|
|
4cad4c |
memcpy(huge, "HUGE=", 5);
|
|
|
4cad4c |
|
|
|
4cad4c |
@@ -248,14 +248,15 @@ static void test_lz4_decompress_partial(void) {
|
|
|
4cad4c |
assert_se(r >= 0);
|
|
|
4cad4c |
log_info("Decompressed partial %i/%i → %i", 12, HUGE_SIZE, r);
|
|
|
4cad4c |
|
|
|
4cad4c |
- /* We expect this to fail, because that's how current lz4 works. If this
|
|
|
4cad4c |
- * call succeeds, then lz4 has been fixed, and we need to change our code.
|
|
|
4cad4c |
- */
|
|
|
4cad4c |
- r = LZ4_decompress_safe_partial(buf, huge,
|
|
|
4cad4c |
- compressed,
|
|
|
4cad4c |
- 12, HUGE_SIZE-1);
|
|
|
4cad4c |
- assert_se(r < 0);
|
|
|
4cad4c |
- log_info("Decompressed partial %i/%i → %i", 12, HUGE_SIZE-1, r);
|
|
|
4cad4c |
+ for (size_t size = 1; size < sizeof(buf2); size++) {
|
|
|
4cad4c |
+ /* This failed in older lz4s but works in newer ones. */
|
|
|
4cad4c |
+ r = LZ4_decompress_safe_partial(buf, buf2, compressed, size, size);
|
|
|
4cad4c |
+ log_info("Decompressed partial %zu/%zu → %i (%s)", size, size, r,
|
|
|
4cad4c |
+ r < 0 ? "bad" : "good");
|
|
|
4cad4c |
+ if (r >= 0 && LZ4_versionNumber() >= 10803)
|
|
|
4cad4c |
+ /* lz4 <= 1.8.2 should fail that test, let's only check for newer ones */
|
|
|
4cad4c |
+ assert_se(memcmp(buf2, huge, r) == 0);
|
|
|
4cad4c |
+ }
|
|
|
4cad4c |
}
|
|
|
4cad4c |
#endif
|
|
|
4cad4c |
|