| From 457fd5fdb8a089eec09fc21e06ce3099633a248a Mon Sep 17 00:00:00 2001 |
| From: Jeffrey Cody <jcody@redhat.com> |
| Date: Wed, 20 Nov 2013 19:43:57 +0100 |
| Subject: [PATCH 14/25] block: vhdx - log parsing, replay, and flush support |
| |
| RH-Author: Jeffrey Cody <jcody@redhat.com> |
| Message-id: <a64a5d0ec84826a20a9fb23dce611793afd19cd5.1384975172.git.jcody@redhat.com> |
| Patchwork-id: 55807 |
| O-Subject: [RHEL7 qemu-kvm PATCH 14/26] block: vhdx - log parsing, replay, and flush support |
| Bugzilla: 879234 |
| RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com> |
| RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com> |
| RH-Acked-by: Fam Zheng <famz@redhat.com> |
| |
| This adds support for VHDX v0 logs, as specified in Microsoft's |
| VHDX Specification Format v1.00: |
| https://www.microsoft.com/en-us/download/details.aspx?id=34750 |
| |
| The following support is added: |
| |
| * Log parsing, and validation - validate that an existing log |
| is correct. |
| |
| * Log search - search through an existing log, to find any valid |
| sequence of entries. |
| |
| * Log replay and flush - replay an existing log, and flush/clear |
| the log when complete. |
| |
| The VHDX log is a circular buffer, with elements (sectors) of 4KB. |
| |
| A log entry is a variably-length number of sectors, that is |
| comprised of a header and 'descriptors', that describe each sector. |
| |
| A log may contain multiple entries, know as a log sequence. In a log |
| sequence, each log entry immediately follows the previous entry, with an |
| incrementing sequence number. There can only ever be one active and |
| valid sequence in the log. |
| |
| Each log entry must match the file log GUID in order to be valid (along |
| with other criteria). Once we have flushed all valid log entries, we |
| marked the file log GUID to be zero, which indicates a buffer with no |
| valid entries. |
| |
| Signed-off-by: Jeff Cody <jcody@redhat.com> |
| Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
| (cherry picked from commit 0a43a1b5d7c33208120eeb2d98ebb9ab15dc2c87) |
| |
| block/Makefile.objs | 2 +- |
| block/vhdx-log.c | 728 ++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| block/vhdx.c | 65 +---- |
| block/vhdx.h | 7 +- |
| 4 files changed, 749 insertions(+), 53 deletions(-) |
| create mode 100644 block/vhdx-log.c |
| |
| Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com> |
| |
| block/Makefile.objs | 2 +- |
| block/vhdx-log.c | 728 +++++++++++++++++++++++++++++++++++++++++++++++++++ |
| block/vhdx.c | 65 +---- |
| block/vhdx.h | 7 +- |
| 4 files changed, 749 insertions(+), 53 deletions(-) |
| create mode 100644 block/vhdx-log.c |
| |
| diff --git a/block/Makefile.objs b/block/Makefile.objs |
| index a7b3b87..84dd57f 100644 |
| |
| |
| @@ -2,7 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat |
| block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o |
| block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o |
| block-obj-y += qed-check.o |
| -block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o |
| +block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o |
| block-obj-y += parallels.o blkdebug.o blkverify.o |
| block-obj-y += snapshot.o qapi.o |
| block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o |
| diff --git a/block/vhdx-log.c b/block/vhdx-log.c |
| new file mode 100644 |
| index 0000000..0284729 |
| |
| |
| @@ -0,0 +1,728 @@ |
| +/* |
| + * Block driver for Hyper-V VHDX Images |
| + * |
| + * Copyright (c) 2013 Red Hat, Inc., |
| + * |
| + * Authors: |
| + * Jeff Cody <jcody@redhat.com> |
| + * |
| + * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 |
| + * by Microsoft: |
| + * https://www.microsoft.com/en-us/download/details.aspx?id=34750 |
| + * |
| + * This file covers the functionality of the metadata log writing, parsing, and |
| + * replay. |
| + * |
| + * This work is licensed under the terms of the GNU LGPL, version 2 or later. |
| + * See the COPYING.LIB file in the top-level directory. |
| + * |
| + */ |
| +#include "qemu-common.h" |
| +#include "block/block_int.h" |
| +#include "qemu/module.h" |
| +#include "block/vhdx.h" |
| + |
| + |
| +typedef struct VHDXLogSequence { |
| + bool valid; |
| + uint32_t count; |
| + VHDXLogEntries log; |
| + VHDXLogEntryHeader hdr; |
| +} VHDXLogSequence; |
| + |
| +typedef struct VHDXLogDescEntries { |
| + VHDXLogEntryHeader hdr; |
| + VHDXLogDescriptor desc[]; |
| +} VHDXLogDescEntries; |
| + |
| +static const MSGUID zero_guid = { 0 }; |
| + |
| +/* The log located on the disk is circular buffer containing |
| + * sectors of 4096 bytes each. |
| + * |
| + * It is assumed for the read/write functions below that the |
| + * circular buffer scheme uses a 'one sector open' to indicate |
| + * the buffer is full. Given the validation methods used for each |
| + * sector, this method should be compatible with other methods that |
| + * do not waste a sector. |
| + */ |
| + |
| + |
| +/* Allow peeking at the hdr entry at the beginning of the current |
| + * read index, without advancing the read index */ |
| +static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, |
| + VHDXLogEntryHeader *hdr) |
| +{ |
| + int ret = 0; |
| + uint64_t offset; |
| + uint32_t read; |
| + |
| + assert(hdr != NULL); |
| + |
| + /* peek is only supported on sector boundaries */ |
| + if (log->read % VHDX_LOG_SECTOR_SIZE) { |
| + ret = -EFAULT; |
| + goto exit; |
| + } |
| + |
| + read = log->read; |
| + /* we are guaranteed that a) log sectors are 4096 bytes, |
| + * and b) the log length is a multiple of 1MB. So, there |
| + * is always a round number of sectors in the buffer */ |
| + if ((read + sizeof(VHDXLogEntryHeader)) > log->length) { |
| + read = 0; |
| + } |
| + |
| + if (read == log->write) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + |
| + offset = log->offset + read; |
| + |
| + ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader)); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + |
| +exit: |
| + return ret; |
| +} |
| + |
| +/* Index increment for log, based on sector boundaries */ |
| +static int vhdx_log_inc_idx(uint32_t idx, uint64_t length) |
| +{ |
| + idx += VHDX_LOG_SECTOR_SIZE; |
| + /* we are guaranteed that a) log sectors are 4096 bytes, |
| + * and b) the log length is a multiple of 1MB. So, there |
| + * is always a round number of sectors in the buffer */ |
| + return idx >= length ? 0 : idx; |
| +} |
| + |
| + |
| +/* Reset the log to empty */ |
| +static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s) |
| +{ |
| + MSGUID guid = { 0 }; |
| + s->log.read = s->log.write = 0; |
| + /* a log guid of 0 indicates an empty log to any parser of v0 |
| + * VHDX logs */ |
| + vhdx_update_headers(bs, s, false, &guid); |
| +} |
| + |
| +/* Reads num_sectors from the log (all log sectors are 4096 bytes), |
| + * into buffer 'buffer'. Upon return, *sectors_read will contain |
| + * the number of sectors successfully read. |
| + * |
| + * It is assumed that 'buffer' is already allocated, and of sufficient |
| + * size (i.e. >= 4096*num_sectors). |
| + * |
| + * If 'peek' is true, then the tail (read) pointer for the circular buffer is |
| + * not modified. |
| + * |
| + * 0 is returned on success, -errno otherwise. */ |
| +static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log, |
| + uint32_t *sectors_read, void *buffer, |
| + uint32_t num_sectors, bool peek) |
| +{ |
| + int ret = 0; |
| + uint64_t offset; |
| + uint32_t read; |
| + |
| + read = log->read; |
| + |
| + *sectors_read = 0; |
| + while (num_sectors) { |
| + if (read == log->write) { |
| + /* empty */ |
| + break; |
| + } |
| + offset = log->offset + read; |
| + |
| + ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + read = vhdx_log_inc_idx(read, log->length); |
| + |
| + *sectors_read = *sectors_read + 1; |
| + num_sectors--; |
| + } |
| + |
| +exit: |
| + if (!peek) { |
| + log->read = read; |
| + } |
| + return ret; |
| +} |
| + |
| +/* Validates a log entry header */ |
| +static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr, |
| + BDRVVHDXState *s) |
| +{ |
| + int valid = false; |
| + |
| + if (memcmp(&hdr->signature, "loge", 4)) { |
| + goto exit; |
| + } |
| + |
| + /* if the individual entry length is larger than the whole log |
| + * buffer, that is obviously invalid */ |
| + if (log->length < hdr->entry_length) { |
| + goto exit; |
| + } |
| + |
| + /* length of entire entry must be in units of 4KB (log sector size) */ |
| + if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) { |
| + goto exit; |
| + } |
| + |
| + /* per spec, sequence # must be > 0 */ |
| + if (hdr->sequence_number == 0) { |
| + goto exit; |
| + } |
| + |
| + /* log entries are only valid if they match the file-wide log guid |
| + * found in the active header */ |
| + if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) { |
| + goto exit; |
| + } |
| + |
| + if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) { |
| + goto exit; |
| + } |
| + |
| + valid = true; |
| + |
| +exit: |
| + return valid; |
| +} |
| + |
| +/* |
| + * Given a log header, this will validate that the descriptors and the |
| + * corresponding data sectors (if applicable) |
| + * |
| + * Validation consists of: |
| + * 1. Making sure the sequence numbers matches the entry header |
| + * 2. Verifying a valid signature ('zero' or 'desc' for descriptors) |
| + * 3. File offset field is a multiple of 4KB |
| + * 4. If a data descriptor, the corresponding data sector |
| + * has its signature ('data') and matching sequence number |
| + * |
| + * @desc: the data buffer containing the descriptor |
| + * @hdr: the log entry header |
| + * |
| + * Returns true if valid |
| + */ |
| +static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc, |
| + VHDXLogEntryHeader *hdr) |
| +{ |
| + bool ret = false; |
| + |
| + if (desc->sequence_number != hdr->sequence_number) { |
| + goto exit; |
| + } |
| + if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) { |
| + goto exit; |
| + } |
| + |
| + if (!memcmp(&desc->signature, "zero", 4)) { |
| + if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) { |
| + /* valid */ |
| + ret = true; |
| + } |
| + } else if (!memcmp(&desc->signature, "desc", 4)) { |
| + /* valid */ |
| + ret = true; |
| + } |
| + |
| +exit: |
| + return ret; |
| +} |
| + |
| + |
| +/* Prior to sector data for a log entry, there is the header |
| + * and the descriptors referenced in the header: |
| + * |
| + * [] = 4KB sector |
| + * |
| + * [ hdr, desc ][ desc ][ ... ][ data ][ ... ] |
| + * |
| + * The first sector in a log entry has a 64 byte header, and |
| + * up to 126 32-byte descriptors. If more descriptors than |
| + * 126 are required, then subsequent sectors can have up to 128 |
| + * descriptors. Each sector is 4KB. Data follows the descriptor |
| + * sectors. |
| + * |
| + * This will return the number of sectors needed to encompass |
| + * the passed number of descriptors in desc_cnt. |
| + * |
| + * This will never return 0, even if desc_cnt is 0. |
| + */ |
| +static int vhdx_compute_desc_sectors(uint32_t desc_cnt) |
| +{ |
| + uint32_t desc_sectors; |
| + |
| + desc_cnt += 2; /* account for header in first sector */ |
| + desc_sectors = desc_cnt / 128; |
| + if (desc_cnt % 128) { |
| + desc_sectors++; |
| + } |
| + |
| + return desc_sectors; |
| +} |
| + |
| + |
| +/* Reads the log header, and subsequent descriptors (if any). This |
| + * will allocate all the space for buffer, which must be NULL when |
| + * passed into this function. Each descriptor will also be validated, |
| + * and error returned if any are invalid. */ |
| +static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, |
| + VHDXLogEntries *log, VHDXLogDescEntries **buffer) |
| +{ |
| + int ret = 0; |
| + uint32_t desc_sectors; |
| + uint32_t sectors_read; |
| + VHDXLogEntryHeader hdr; |
| + VHDXLogDescEntries *desc_entries = NULL; |
| + int i; |
| + |
| + assert(*buffer == NULL); |
| + |
| + ret = vhdx_log_peek_hdr(bs, log, &hdr); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + vhdx_log_entry_hdr_le_import(&hdr); |
| + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + |
| + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); |
| + desc_entries = qemu_blockalign(bs, desc_sectors * VHDX_LOG_SECTOR_SIZE); |
| + |
| + ret = vhdx_log_read_sectors(bs, log, §ors_read, desc_entries, |
| + desc_sectors, false); |
| + if (ret < 0) { |
| + goto free_and_exit; |
| + } |
| + if (sectors_read != desc_sectors) { |
| + ret = -EINVAL; |
| + goto free_and_exit; |
| + } |
| + |
| + /* put in proper endianness, and validate each desc */ |
| + for (i = 0; i < hdr.descriptor_count; i++) { |
| + vhdx_log_desc_le_import(&desc_entries->desc[i]); |
| + if (vhdx_log_desc_is_valid(&desc_entries->desc[i], &hdr) == false) { |
| + ret = -EINVAL; |
| + goto free_and_exit; |
| + } |
| + } |
| + |
| + *buffer = desc_entries; |
| + goto exit; |
| + |
| +free_and_exit: |
| + qemu_vfree(desc_entries); |
| +exit: |
| + return ret; |
| +} |
| + |
| + |
| +/* Flushes the descriptor described by desc to the VHDX image file. |
| + * If the descriptor is a data descriptor, than 'data' must be non-NULL, |
| + * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be |
| + * written. |
| + * |
| + * Verification is performed to make sure the sequence numbers of a data |
| + * descriptor match the sequence number in the desc. |
| + * |
| + * For a zero descriptor, it may describe multiple sectors to fill with zeroes. |
| + * In this case, it should be noted that zeroes are written to disk, and the |
| + * image file is not extended as a sparse file. */ |
| +static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, |
| + VHDXLogDataSector *data) |
| +{ |
| + int ret = 0; |
| + uint64_t seq, file_offset; |
| + uint32_t offset = 0; |
| + void *buffer = NULL; |
| + uint64_t count = 1; |
| + int i; |
| + |
| + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); |
| + |
| + if (!memcmp(&desc->signature, "desc", 4)) { |
| + /* data sector */ |
| + if (data == NULL) { |
| + ret = -EFAULT; |
| + goto exit; |
| + } |
| + |
| + /* The sequence number of the data sector must match that |
| + * in the descriptor */ |
| + seq = data->sequence_high; |
| + seq <<= 32; |
| + seq |= data->sequence_low & 0xffffffff; |
| + |
| + if (seq != desc->sequence_number) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + |
| + /* Each data sector is in total 4096 bytes, however the first |
| + * 8 bytes, and last 4 bytes, are located in the descriptor */ |
| + memcpy(buffer, &desc->leading_bytes, 8); |
| + offset += 8; |
| + |
| + memcpy(buffer+offset, data->data, 4084); |
| + offset += 4084; |
| + |
| + memcpy(buffer+offset, &desc->trailing_bytes, 4); |
| + |
| + } else if (!memcmp(&desc->signature, "zero", 4)) { |
| + /* write 'count' sectors of sector */ |
| + memset(buffer, 0, VHDX_LOG_SECTOR_SIZE); |
| + count = desc->zero_length / VHDX_LOG_SECTOR_SIZE; |
| + } |
| + |
| + file_offset = desc->file_offset; |
| + |
| + /* count is only > 1 if we are writing zeroes */ |
| + for (i = 0; i < count; i++) { |
| + ret = bdrv_pwrite_sync(bs->file, file_offset, buffer, |
| + VHDX_LOG_SECTOR_SIZE); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + file_offset += VHDX_LOG_SECTOR_SIZE; |
| + } |
| + |
| +exit: |
| + qemu_vfree(buffer); |
| + return ret; |
| +} |
| + |
| +/* Flush the entire log (as described by 'logs') to the VHDX image |
| + * file, and then set the log to 'empty' status once complete. |
| + * |
| + * The log entries should be validate prior to flushing */ |
| +static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, |
| + VHDXLogSequence *logs) |
| +{ |
| + int ret = 0; |
| + int i; |
| + uint32_t cnt, sectors_read; |
| + uint64_t new_file_size; |
| + void *data = NULL; |
| + VHDXLogDescEntries *desc_entries = NULL; |
| + VHDXLogEntryHeader hdr_tmp = { 0 }; |
| + |
| + cnt = logs->count; |
| + |
| + data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); |
| + |
| + ret = vhdx_user_visible_write(bs, s); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + |
| + /* each iteration represents one log sequence, which may span multiple |
| + * sectors */ |
| + while (cnt--) { |
| + ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + /* if the log shows a FlushedFileOffset larger than our current file |
| + * size, then that means the file has been truncated / corrupted, and |
| + * we must refused to open it / use it */ |
| + if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + |
| + ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + |
| + for (i = 0; i < desc_entries->hdr.descriptor_count; i++) { |
| + if (!memcmp(&desc_entries->desc[i].signature, "desc", 4)) { |
| + /* data sector, so read a sector to flush */ |
| + ret = vhdx_log_read_sectors(bs, &logs->log, §ors_read, |
| + data, 1, false); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + if (sectors_read != 1) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + } |
| + |
| + ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + } |
| + if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) { |
| + new_file_size = desc_entries->hdr.last_file_offset; |
| + if (new_file_size % (1024*1024)) { |
| + /* round up to nearest 1MB boundary */ |
| + new_file_size = ((new_file_size >> 20) + 1) << 20; |
| + bdrv_truncate(bs->file, new_file_size); |
| + } |
| + } |
| + qemu_vfree(desc_entries); |
| + desc_entries = NULL; |
| + } |
| + |
| + bdrv_flush(bs); |
| + /* once the log is fully flushed, indicate that we have an empty log |
| + * now. This also sets the log guid to 0, to indicate an empty log */ |
| + vhdx_log_reset(bs, s); |
| + |
| +exit: |
| + qemu_vfree(data); |
| + qemu_vfree(desc_entries); |
| + return ret; |
| +} |
| + |
| +static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s, |
| + VHDXLogEntries *log, uint64_t seq, |
| + bool *valid, VHDXLogEntryHeader *entry) |
| +{ |
| + int ret = 0; |
| + VHDXLogEntryHeader hdr; |
| + void *buffer = NULL; |
| + uint32_t i, desc_sectors, total_sectors, crc; |
| + uint32_t sectors_read = 0; |
| + VHDXLogDescEntries *desc_buffer = NULL; |
| + |
| + *valid = false; |
| + |
| + ret = vhdx_log_peek_hdr(bs, log, &hdr); |
| + if (ret < 0) { |
| + goto inc_and_exit; |
| + } |
| + |
| + vhdx_log_entry_hdr_le_import(&hdr); |
| + |
| + |
| + if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) { |
| + goto inc_and_exit; |
| + } |
| + |
| + if (seq > 0) { |
| + if (hdr.sequence_number != seq + 1) { |
| + goto inc_and_exit; |
| + } |
| + } |
| + |
| + desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); |
| + |
| + /* Read desc sectors, and calculate log checksum */ |
| + |
| + total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE; |
| + |
| + |
| + /* read_desc() will incrememnt the read idx */ |
| + ret = vhdx_log_read_desc(bs, s, log, &desc_buffer); |
| + if (ret < 0) { |
| + goto free_and_exit; |
| + } |
| + |
| + crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer, |
| + desc_sectors * VHDX_LOG_SECTOR_SIZE, 4); |
| + crc ^= 0xffffffff; |
| + |
| + buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE); |
| + if (total_sectors > desc_sectors) { |
| + for (i = 0; i < total_sectors - desc_sectors; i++) { |
| + sectors_read = 0; |
| + ret = vhdx_log_read_sectors(bs, log, §ors_read, buffer, |
| + 1, false); |
| + if (ret < 0 || sectors_read != 1) { |
| + goto free_and_exit; |
| + } |
| + crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1); |
| + crc ^= 0xffffffff; |
| + } |
| + } |
| + crc ^= 0xffffffff; |
| + if (crc != desc_buffer->hdr.checksum) { |
| + goto free_and_exit; |
| + } |
| + |
| + *valid = true; |
| + *entry = hdr; |
| + goto free_and_exit; |
| + |
| +inc_and_exit: |
| + log->read = vhdx_log_inc_idx(log->read, log->length); |
| + |
| +free_and_exit: |
| + qemu_vfree(buffer); |
| + qemu_vfree(desc_buffer); |
| + return ret; |
| +} |
| + |
| +/* Search through the log circular buffer, and find the valid, active |
| + * log sequence, if any exists |
| + * */ |
| +static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s, |
| + VHDXLogSequence *logs) |
| +{ |
| + int ret = 0; |
| + uint32_t tail; |
| + bool seq_valid = false; |
| + VHDXLogSequence candidate = { 0 }; |
| + VHDXLogEntryHeader hdr = { 0 }; |
| + VHDXLogEntries curr_log; |
| + |
| + memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries)); |
| + curr_log.write = curr_log.length; /* assume log is full */ |
| + curr_log.read = 0; |
| + |
| + |
| + /* now we will go through the whole log sector by sector, until |
| + * we find a valid, active log sequence, or reach the end of the |
| + * log buffer */ |
| + for (;;) { |
| + uint64_t curr_seq = 0; |
| + VHDXLogSequence current = { 0 }; |
| + |
| + tail = curr_log.read; |
| + |
| + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, |
| + &seq_valid, &hdr); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + |
| + if (seq_valid) { |
| + current.valid = true; |
| + current.log = curr_log; |
| + current.log.read = tail; |
| + current.log.write = curr_log.read; |
| + current.count = 1; |
| + current.hdr = hdr; |
| + |
| + |
| + for (;;) { |
| + ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq, |
| + &seq_valid, &hdr); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + if (seq_valid == false) { |
| + break; |
| + } |
| + current.log.write = curr_log.read; |
| + current.count++; |
| + |
| + curr_seq = hdr.sequence_number; |
| + } |
| + } |
| + |
| + if (current.valid) { |
| + if (candidate.valid == false || |
| + current.hdr.sequence_number > candidate.hdr.sequence_number) { |
| + candidate = current; |
| + } |
| + } |
| + |
| + if (curr_log.read < tail) { |
| + break; |
| + } |
| + } |
| + |
| + *logs = candidate; |
| + |
| + if (candidate.valid) { |
| + /* this is the next sequence number, for writes */ |
| + s->log.sequence = candidate.hdr.sequence_number + 1; |
| + } |
| + |
| + |
| +exit: |
| + return ret; |
| +} |
| + |
| +/* Parse the replay log. Per the VHDX spec, if the log is present |
| + * it must be replayed prior to opening the file, even read-only. |
| + * |
| + * If read-only, we must replay the log in RAM (or refuse to open |
| + * a dirty VHDX file read-only) */ |
| +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed) |
| +{ |
| + int ret = 0; |
| + VHDXHeader *hdr; |
| + VHDXLogSequence logs = { 0 }; |
| + |
| + hdr = s->headers[s->curr_header]; |
| + |
| + *flushed = false; |
| + |
| + /* s->log.hdr is freed in vhdx_close() */ |
| + if (s->log.hdr == NULL) { |
| + s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader)); |
| + } |
| + |
| + s->log.offset = hdr->log_offset; |
| + s->log.length = hdr->log_length; |
| + |
| + if (s->log.offset < VHDX_LOG_MIN_SIZE || |
| + s->log.offset % VHDX_LOG_MIN_SIZE) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + |
| + /* per spec, only log version of 0 is supported */ |
| + if (hdr->log_version != 0) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + |
| + /* If either the log guid, or log length is zero, |
| + * then a replay log is not present */ |
| + if (guid_eq(hdr->log_guid, zero_guid)) { |
| + goto exit; |
| + } |
| + |
| + if (hdr->log_length == 0) { |
| + goto exit; |
| + } |
| + |
| + if (hdr->log_length % VHDX_LOG_MIN_SIZE) { |
| + ret = -EINVAL; |
| + goto exit; |
| + } |
| + |
| + |
| + /* The log is present, we need to find if and where there is an active |
| + * sequence of valid entries present in the log. */ |
| + |
| + ret = vhdx_log_search(bs, s, &logs); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + |
| + if (logs.valid) { |
| + /* now flush the log */ |
| + ret = vhdx_log_flush(bs, s, &logs); |
| + if (ret < 0) { |
| + goto exit; |
| + } |
| + *flushed = true; |
| + } |
| + |
| + |
| +exit: |
| + return ret; |
| +} |
| + |
| + |
| diff --git a/block/vhdx.c b/block/vhdx.c |
| index 7d697e4..b552dde 100644 |
| |
| |
| @@ -735,58 +735,22 @@ exit: |
| return ret; |
| } |
| |
| -/* Parse the replay log. Per the VHDX spec, if the log is present |
| - * it must be replayed prior to opening the file, even read-only. |
| - * |
| - * If read-only, we must replay the log in RAM (or refuse to open |
| - * a dirty VHDX file read-only */ |
| -static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s) |
| -{ |
| - int ret = 0; |
| - int i; |
| - VHDXHeader *hdr; |
| - |
| - hdr = s->headers[s->curr_header]; |
| - |
| - /* either the log guid, or log length is zero, |
| - * then a replay log is present */ |
| - for (i = 0; i < sizeof(hdr->log_guid.data4); i++) { |
| - ret |= hdr->log_guid.data4[i]; |
| - } |
| - if (hdr->log_guid.data1 == 0 && |
| - hdr->log_guid.data2 == 0 && |
| - hdr->log_guid.data3 == 0 && |
| - ret == 0) { |
| - goto exit; |
| - } |
| - |
| - /* per spec, only log version of 0 is supported */ |
| - if (hdr->log_version != 0) { |
| - ret = -EINVAL; |
| - goto exit; |
| - } |
| - |
| - if (hdr->log_length == 0) { |
| - goto exit; |
| - } |
| - |
| - /* We currently do not support images with logs to replay */ |
| - ret = -ENOTSUP; |
| - |
| -exit: |
| - return ret; |
| -} |
| - |
| |
| static void vhdx_close(BlockDriverState *bs) |
| { |
| BDRVVHDXState *s = bs->opaque; |
| qemu_vfree(s->headers[0]); |
| + s->headers[0] = NULL; |
| qemu_vfree(s->headers[1]); |
| + s->headers[1] = NULL; |
| qemu_vfree(s->bat); |
| + s->bat = NULL; |
| qemu_vfree(s->parent_entries); |
| + s->parent_entries = NULL; |
| migrate_del_blocker(s->migration_blocker); |
| error_free(s->migration_blocker); |
| + qemu_vfree(s->log.hdr); |
| + s->log.hdr = NULL; |
| } |
| |
| static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, |
| @@ -797,6 +761,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, |
| uint32_t i; |
| uint64_t signature; |
| uint32_t data_blocks_cnt, bitmap_blocks_cnt; |
| + bool log_flushed = false; |
| |
| |
| s->bat = NULL; |
| @@ -820,24 +785,25 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, |
| vhdx_guid_generate(&s->session_guid); |
| |
| ret = vhdx_parse_header(bs, s); |
| - if (ret) { |
| + if (ret < 0) { |
| goto fail; |
| } |
| |
| - ret = vhdx_parse_log(bs, s); |
| - if (ret) { |
| + ret = vhdx_parse_log(bs, s, &log_flushed); |
| + if (ret < 0) { |
| goto fail; |
| } |
| |
| ret = vhdx_open_region_tables(bs, s); |
| - if (ret) { |
| + if (ret < 0) { |
| goto fail; |
| } |
| |
| ret = vhdx_parse_metadata(bs, s); |
| - if (ret) { |
| + if (ret < 0) { |
| goto fail; |
| } |
| + |
| s->block_size = s->params.block_size; |
| |
| /* the VHDX spec dictates that virtual_disk_size is always a multiple of |
| @@ -897,10 +863,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, |
| |
| return 0; |
| fail: |
| - qemu_vfree(s->headers[0]); |
| - qemu_vfree(s->headers[1]); |
| - qemu_vfree(s->bat); |
| - qemu_vfree(s->parent_entries); |
| + vhdx_close(bs); |
| return ret; |
| } |
| |
| diff --git a/block/vhdx.h b/block/vhdx.h |
| index 81785e5..b150ad1 100644 |
| |
| |
| @@ -326,7 +326,11 @@ typedef struct VHDXMetadataEntries { |
| typedef struct VHDXLogEntries { |
| uint64_t offset; |
| uint64_t length; |
| - uint32_t head; |
| + uint32_t write; |
| + uint32_t read; |
| + VHDXLogEntryHeader *hdr; |
| + void *desc_buffer; |
| + uint64_t sequence; |
| uint32_t tail; |
| } VHDXLogEntries; |
| |
| @@ -383,6 +387,7 @@ uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, |
| |
| bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); |
| |
| +int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed); |
| |
| static inline void leguid_to_cpus(MSGUID *guid) |
| { |
| -- |
| 1.7.1 |
| |