diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c index 9986fbf..9f2f99d 100644 --- a/copy/xfs_copy.c +++ b/copy/xfs_copy.c @@ -684,6 +684,16 @@ main(int argc, char **argv) sb = &mbuf.m_sb; libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbp)); + /* + * For now, V5 superblock filesystems are not supported without -d; + * we do not have the infrastructure yet to fix CRCs when a new UUID + * is generated. + */ + if (xfs_sb_version_hascrc(sb) && !duplicate) { + do_log(_("%s: Cannot yet copy V5 fs without '-d'\n"), progname); + exit(1); + } + mp = libxfs_mount(&mbuf, sb, xargs.ddev, xargs.logdev, xargs.rtdev, 0); if (mp == NULL) { do_log(_("%s: %s filesystem failed to initialize\n" @@ -957,7 +967,13 @@ main(int argc, char **argv) ((char *)btree_buf.data + pos - btree_buf.position); - ASSERT(be32_to_cpu(block->bb_magic) == XFS_ABTB_MAGIC); + if (be32_to_cpu(block->bb_magic) != + (xfs_sb_version_hascrc(&mp->m_sb) ? + XFS_ABTB_CRC_MAGIC : XFS_ABTB_MAGIC)) { + do_log(_("Bad btree magic 0x%x\n"), + be32_to_cpu(block->bb_magic)); + exit(1); + } if (be16_to_cpu(block->bb_level) == 0) break; diff --git a/db/attr.c b/db/attr.c index 740d564..caa154e 100644 --- a/db/attr.c +++ b/db/attr.c @@ -170,7 +170,7 @@ attr3_leaf_entries_count( struct xfs_attr3_leafblock *leaf = obj; ASSERT(startoff == 0); - if (be16_to_cpu(leaf->hdr.info.hdr.magic) != XFS_ATTR_LEAF_MAGIC) + if (be16_to_cpu(leaf->hdr.info.hdr.magic) != XFS_ATTR3_LEAF_MAGIC) return 0; return be16_to_cpu(leaf->hdr.count); } diff --git a/db/bit.c b/db/bit.c index ca57d31..e8adab3 100644 --- a/db/bit.c +++ b/db/bit.c @@ -128,57 +128,41 @@ getbitval( return rval; } +/* + * The input data can be 8, 16, 32, and 64 sized numeric values + * aligned on a byte boundry, or odd sized numbers stored on odd + * aligned offset (for example the bmbt fields). + * + * The input data sent to this routine has been converted to big endian + * and has been adjusted in the array so that the first input bit is to + * be written in the first bit in the output. + * + * If the field length and the output buffer are byte aligned, then use + * memcpy from the input to the output, but if either entries are not byte + * aligned, then loop over the entire bit range reading the input value + * and set/clear the matching bit in the output. + * + * example when ibuf is not multiple of a byte in length: + * + * ibuf: | BBBBBBBB | bbbxxxxx | + * \\\\\\\\--\\\\ + * obuf+bitoff: | xBBBBBBB | Bbbbxxxx | + * + */ void setbitval( - void *obuf, /* buffer to write into */ - int bitoff, /* bit offset of where to write */ - int nbits, /* number of bits to write */ - void *ibuf) /* source bits */ + void *obuf, /* start of buffer to write into */ + int bitoff, /* bit offset into the output buffer */ + int nbits, /* number of bits to write */ + void *ibuf) /* source bits */ { - char *in = (char *)ibuf; - char *out = (char *)obuf; - - int bit; - -#if BYTE_ORDER == LITTLE_ENDIAN - int big = 0; -#else - int big = 1; -#endif - - /* only need to swap LE integers */ - if (big || (nbits!=16 && nbits!=32 && nbits!=64) ) { - /* We don't have type info, so we can only assume - * that 2,4 & 8 byte values are integers. sigh. - */ - - /* byte aligned ? */ - if (bitoff%NBBY) { - /* no - bit copy */ - for (bit=0; bitsb_magicnum != XFS_SB_MAGIC) { fprintf(stderr, _("%s: %s is not a valid XFS filesystem (unexpected SB magic number 0x%08x)\n"), progname, fsdevice, sbp->sb_magicnum); - if (!force) + if (!force) { + fprintf(stderr, _("Use -F to force a read attempt.\n")); exit(EXIT_FAILURE); + } } mp = libxfs_mount(&xmount, sbp, x.ddev, x.logdev, x.rtdev, diff --git a/db/io.c b/db/io.c index 123214d..9a787c8 100644 --- a/db/io.c +++ b/db/io.c @@ -449,9 +449,7 @@ write_cur_bbs(void) /* re-read buffer from disk */ - ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, - iocur_top->bbmap->b, iocur_top->bbmap->nmaps, - 0); + ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, 0); if (ret != 0) dbprintf(_("read error: %s\n"), strerror(ret)); } @@ -523,10 +521,11 @@ set_cur( } /* - * keep the buffer even if the verifier says it is corrupted. + * Keep the buffer even if the verifier says it is corrupted. * We're a diagnostic tool, after all. */ - if (!bp || (bp->b_error && bp->b_error != EFSCORRUPTED)) + if (!bp || (bp->b_error && bp->b_error != EFSCORRUPTED && + bp->b_error != EFSBADCRC)) return; iocur_top->buf = bp->b_addr; iocur_top->bp = bp; diff --git a/db/io.h b/db/io.h index 4f24c83..ad39bee 100644 --- a/db/io.h +++ b/db/io.h @@ -41,6 +41,7 @@ typedef struct iocur { int ino_crc_ok:1; int ino_buf:1; int dquot_buf:1; + int need_crc:1; } iocur_t; #define DB_RING_ADD 1 /* add to ring on set_cur */ @@ -66,6 +67,6 @@ static inline bool iocur_crc_valid() { return (iocur_top->bp && - iocur_top->bp->b_error != EFSCORRUPTED && + iocur_top->bp->b_error != EFSBADCRC && (!iocur_top->ino_buf || iocur_top->ino_crc_ok)); } diff --git a/db/metadump.c b/db/metadump.c index 117dc42..09bb85a 100644 --- a/db/metadump.c +++ b/db/metadump.c @@ -145,6 +145,8 @@ print_progress(const char *fmt, ...) * even if the dump is exactly aligned, the last index will be full of * zeros. If the last index entry is non-zero, the dump is incomplete. * Correspondingly, the last chunk will have a count < num_indicies. + * + * Return 0 for success, -1 for failure. */ static int @@ -156,49 +158,88 @@ write_index(void) metablock->mb_count = cpu_to_be16(cur_index); if (fwrite(metablock, (cur_index + 1) << BBSHIFT, 1, outf) != 1) { print_warning("error writing to file: %s", strerror(errno)); - return 0; + return -errno; } memset(block_index, 0, num_indicies * sizeof(__be64)); cur_index = 0; - return 1; + return 0; +} + +/* + * Return 0 for success, -errno for failure. + */ +static int +write_buf_segment( + char *data, + __int64_t off, + int len) +{ + int i; + int ret; + + for (i = 0; i < len; i++, off++, data += BBSIZE) { + block_index[cur_index] = cpu_to_be64(off); + memcpy(&block_buffer[cur_index << BBSHIFT], data, BBSIZE); + if (++cur_index == num_indicies) { + ret = write_index(); + if (ret) + return -EIO; + } + } + return 0; } +/* + * we want to preserve the state of the metadata in the dump - whether it is + * intact or corrupt, so even if the buffer has a verifier attached to it we + * don't want to run it prior to writing the buffer to the metadump image. + * + * The only reason for running the verifier is to recalculate the CRCs on a + * buffer that has been obfuscated. i.e. a buffer than metadump modified itself. + * In this case, we only run the verifier if the buffer was not corrupt to begin + * with so that we don't accidentally correct buffers with CRC or errors in them + * when we are obfuscating them. + */ static int write_buf( iocur_t *buf) { - char *data; - __int64_t off; + struct xfs_buf *bp = buf->bp; int i; + int ret; /* * Run the write verifier to recalculate the buffer CRCs and check - * we are writing something valid to disk + * metadump didn't introduce a new corruption. Warn if the verifier + * failed, but still continue to dump it into the output file. */ - if (buf->bp && buf->bp->b_ops) { - buf->bp->b_error = 0; - buf->bp->b_ops->verify_write(buf->bp); - if (buf->bp->b_error) { - fprintf(stderr, - _("%s: write verifer failed on bno 0x%llx/0x%x\n"), - __func__, (long long)buf->bp->b_bn, - buf->bp->b_bcount); - return buf->bp->b_error; + if (buf->need_crc && bp && bp->b_ops && !bp->b_error) { + bp->b_ops->verify_write(bp); + if (bp->b_error) { + print_warning( + "obfuscation corrupted block at bno 0x%llx/0x%x", + (long long)bp->b_bn, bp->b_bcount); } } - for (i = 0, off = buf->bb, data = buf->data; - i < buf->blen; - i++, off++, data += BBSIZE) { - block_index[cur_index] = cpu_to_be64(off); - memcpy(&block_buffer[cur_index << BBSHIFT], data, BBSIZE); - if (++cur_index == num_indicies) { - if (!write_index()) - return 0; + /* handle discontiguous buffers */ + if (!buf->bbmap) { + ret = write_buf_segment(buf->data, buf->bb, buf->blen); + if (ret) + return ret; + } else { + int len = 0; + for (i = 0; i < buf->bbmap->nmaps; i++) { + ret = write_buf_segment(buf->data + BBTOB(len), + buf->bbmap->b[i].bm_bn, + buf->bbmap->b[i].bm_len); + if (ret) + return ret; + len += buf->bbmap->b[i].bm_len; } } - return !seenint(); + return seenint() ? -EINTR : 0; } @@ -227,7 +268,7 @@ scan_btree( rval = !stop_on_read_error; goto pop_out; } - if (!write_buf(iocur_top)) + if (write_buf(iocur_top)) goto pop_out; if (!(*func)(iocur_top->data, agno, agbno, level - 1, btype, arg)) @@ -974,16 +1015,23 @@ obfuscate_sf_dir( } } +/* + * The pathname may not be null terminated. It may be terminated by the end of + * a buffer or inode literal area, and the start of the next region contains + * unknown data. Therefore, when we get to the last component of the symlink, we + * cannot assume that strlen() will give us the right result. Hence we need to + * track the remaining pathname length and use that instead. + */ static void obfuscate_path_components( char *buf, __uint64_t len) { - uchar_t *comp; + uchar_t *comp = (uchar_t *)buf; + uchar_t *end = comp + len; xfs_dahash_t hash; - comp = (uchar_t *)buf; - while (comp < (uchar_t *)buf + len) { + while (comp < end) { char *slash; int namelen; @@ -991,7 +1039,7 @@ obfuscate_path_components( slash = strchr((char *)comp, '/'); if (!slash) { /* last (or single) component */ - namelen = strlen((char *)comp); + namelen = strnlen((char *)comp, len); hash = libxfs_da_hashname(comp, namelen); obfuscate_name(hash, namelen, comp); break; @@ -1000,11 +1048,13 @@ obfuscate_path_components( /* handle leading or consecutive slashes */ if (!namelen) { comp++; + len--; continue; } hash = libxfs_da_hashname(comp, namelen); obfuscate_name(hash, namelen, comp); comp += namelen + 1; + len -= namelen + 1; } } @@ -1080,24 +1130,11 @@ obfuscate_sf_attr( } } -/* - * dir_data structure is used to track multi-fsblock dir2 blocks between extent - * processing calls. - */ - -static struct dir_data_s { - int end_of_data; - int block_index; - int offset_to_entry; - int bad_block; -} dir_data; - static void -obfuscate_dir_data_blocks( - char *block, - xfs_dfiloff_t offset, - xfs_dfilblks_t count, - int is_block_format) +obfuscate_dir_data_block( + char *block, + xfs_dfiloff_t offset, + int is_block_format) { /* * we have to rely on the fileoffset and signature of the block to @@ -1105,133 +1142,105 @@ obfuscate_dir_data_blocks( * for multi-fsblock dir blocks, if a name crosses an extent boundary, * ignore it and continue. */ - int c; - int dir_offset; - char *ptr; - char *endptr; - - if (is_block_format && count != mp->m_dirblkfsbs) - return; /* too complex to handle this rare case */ - - for (c = 0, endptr = block; c < count; c++) { - - if (dir_data.block_index == 0) { - int wantmagic; - struct xfs_dir2_data_hdr *datahdr; - - datahdr = (struct xfs_dir2_data_hdr *)block; - - if (offset % mp->m_dirblkfsbs != 0) - return; /* corrupted, leave it alone */ - - dir_data.bad_block = 0; - - if (is_block_format) { - xfs_dir2_leaf_entry_t *blp; - xfs_dir2_block_tail_t *btp; - - btp = xfs_dir2_block_tail_p(mp, datahdr); - blp = xfs_dir2_block_leaf_p(btp); - if ((char *)blp > (char *)btp) - blp = (xfs_dir2_leaf_entry_t *)btp; - - dir_data.end_of_data = (char *)blp - block; - wantmagic = XFS_DIR2_BLOCK_MAGIC; - } else { /* leaf/node format */ - dir_data.end_of_data = mp->m_dirblkfsbs << - mp->m_sb.sb_blocklog; - wantmagic = XFS_DIR2_DATA_MAGIC; - } - dir_data.offset_to_entry = - xfs_dir3_data_entry_offset(datahdr); + int dir_offset; + char *ptr; + char *endptr; + int end_of_data; + int wantmagic; + struct xfs_dir2_data_hdr *datahdr; + + datahdr = (struct xfs_dir2_data_hdr *)block; + + if (offset % mp->m_dirblkfsbs != 0) + return; /* corrupted, leave it alone */ + + if (is_block_format) { + xfs_dir2_leaf_entry_t *blp; + xfs_dir2_block_tail_t *btp; + + btp = xfs_dir2_block_tail_p(mp, datahdr); + blp = xfs_dir2_block_leaf_p(btp); + if ((char *)blp > (char *)btp) + blp = (xfs_dir2_leaf_entry_t *)btp; + + end_of_data = (char *)blp - block; + if (xfs_sb_version_hascrc(&mp->m_sb)) + wantmagic = XFS_DIR3_BLOCK_MAGIC; + else + wantmagic = XFS_DIR2_BLOCK_MAGIC; + } else { /* leaf/node format */ + end_of_data = mp->m_dirblkfsbs << mp->m_sb.sb_blocklog; + if (xfs_sb_version_hascrc(&mp->m_sb)) + wantmagic = XFS_DIR3_DATA_MAGIC; + else + wantmagic = XFS_DIR2_DATA_MAGIC; + } - if (be32_to_cpu(datahdr->magic) != wantmagic) { - if (show_warnings) - print_warning("invalid magic in dir " - "inode %llu block %ld", - (long long)cur_ino, - (long)offset); - dir_data.bad_block = 1; - } - } - dir_data.block_index++; - if (dir_data.block_index == mp->m_dirblkfsbs) - dir_data.block_index = 0; + if (be32_to_cpu(datahdr->magic) != wantmagic) { + if (show_warnings) + print_warning( + "invalid magic in dir inode %llu block %ld", + (long long)cur_ino, (long)offset); + return; + } - if (dir_data.bad_block) - continue; + dir_offset = xfs_dir3_data_entry_offset(datahdr); + ptr = block + dir_offset; + endptr = block + mp->m_sb.sb_blocksize; - dir_offset = (dir_data.block_index << mp->m_sb.sb_blocklog) + - dir_data.offset_to_entry; - - ptr = endptr + dir_data.offset_to_entry; - endptr += mp->m_sb.sb_blocksize; - - while (ptr < endptr && dir_offset < dir_data.end_of_data) { - xfs_dir2_data_entry_t *dep; - xfs_dir2_data_unused_t *dup; - int length; - - dup = (xfs_dir2_data_unused_t *)ptr; - - if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { - int length = be16_to_cpu(dup->length); - if (dir_offset + length > dir_data.end_of_data || - length == 0 || (length & - (XFS_DIR2_DATA_ALIGN - 1))) { - if (show_warnings) - print_warning("invalid length " - "for dir free space in " - "inode %llu", - (long long)cur_ino); - dir_data.bad_block = 1; - break; - } - if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != - dir_offset) { - dir_data.bad_block = 1; - break; - } - dir_offset += length; - ptr += length; - if (dir_offset >= dir_data.end_of_data || - ptr >= endptr) - break; - } + while (ptr < endptr && dir_offset < end_of_data) { + xfs_dir2_data_entry_t *dep; + xfs_dir2_data_unused_t *dup; + int length; - dep = (xfs_dir2_data_entry_t *)ptr; - length = xfs_dir3_data_entsize(mp, dep->namelen); + dup = (xfs_dir2_data_unused_t *)ptr; - if (dir_offset + length > dir_data.end_of_data || - ptr + length > endptr) { + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + int length = be16_to_cpu(dup->length); + if (dir_offset + length > end_of_data || + !length || (length & (XFS_DIR2_DATA_ALIGN - 1))) { if (show_warnings) - print_warning("invalid length for " - "dir entry name in inode %llu", + print_warning( + "invalid length for dir free space in inode %llu", (long long)cur_ino); - break; + return; } - if (be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) != - dir_offset) { - dir_data.bad_block = 1; - break; - } - generate_obfuscated_name(be64_to_cpu(dep->inumber), - dep->namelen, &dep->name[0]); + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != + dir_offset) + return; dir_offset += length; ptr += length; + if (dir_offset >= end_of_data || ptr >= endptr) + return; + } + + dep = (xfs_dir2_data_entry_t *)ptr; + length = xfs_dir3_data_entsize(mp, dep->namelen); + + if (dir_offset + length > end_of_data || + ptr + length > endptr) { + if (show_warnings) + print_warning( + "invalid length for dir entry name in inode %llu", + (long long)cur_ino); + return; } - dir_data.offset_to_entry = dir_offset & - (mp->m_sb.sb_blocksize - 1); + if (be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) != + dir_offset) + return; + generate_obfuscated_name(be64_to_cpu(dep->inumber), + dep->namelen, &dep->name[0]); + dir_offset += length; + ptr += length; } } static void -obfuscate_symlink_blocks( - char *block, - xfs_dfilblks_t count) +obfuscate_symlink_block( + char *block) { - count <<= mp->m_sb.sb_blocklog; - obfuscate_path_components(block, count); + /* XXX: need to handle CRC headers */ + obfuscate_path_components(block, mp->m_sb.sb_blocksize); } #define MAX_REMOTE_VALS 4095 @@ -1252,86 +1261,227 @@ add_remote_vals( blockidx++; length -= XFS_LBSIZE(mp); } + + if (attr_data.remote_val_count >= MAX_REMOTE_VALS) { + print_warning( +"Overflowed attr obfuscation array. No longer obfuscating remote attrs."); + } } static void -obfuscate_attr_blocks( +obfuscate_attr_block( char *block, - xfs_dfiloff_t offset, - xfs_dfilblks_t count) + xfs_dfiloff_t offset) { xfs_attr_leafblock_t *leaf; - int c; int i; int nentries; xfs_attr_leaf_entry_t *entry; xfs_attr_leaf_name_local_t *local; xfs_attr_leaf_name_remote_t *remote; - for (c = 0; c < count; c++, offset++, block += XFS_LBSIZE(mp)) { + leaf = (xfs_attr_leafblock_t *)block; - leaf = (xfs_attr_leafblock_t *)block; - - if (be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC) { - for (i = 0; i < attr_data.remote_val_count; i++) { - if (attr_data.remote_vals[i] == offset) - memset(block, 0, XFS_LBSIZE(mp)); - } - continue; + if (be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC) { + for (i = 0; i < attr_data.remote_val_count; i++) { + /* XXX: need to handle CRC headers */ + if (attr_data.remote_vals[i] == offset) + memset(block, 0, XFS_LBSIZE(mp)); } + return; + } - nentries = be16_to_cpu(leaf->hdr.count); - if (nentries * sizeof(xfs_attr_leaf_entry_t) + - sizeof(xfs_attr_leaf_hdr_t) > XFS_LBSIZE(mp)) { + nentries = be16_to_cpu(leaf->hdr.count); + if (nentries * sizeof(xfs_attr_leaf_entry_t) + + sizeof(xfs_attr_leaf_hdr_t) > XFS_LBSIZE(mp)) { + if (show_warnings) + print_warning("invalid attr count in inode %llu", + (long long)cur_ino); + return; + } + + for (i = 0, entry = &leaf->entries[0]; i < nentries; i++, entry++) { + if (be16_to_cpu(entry->nameidx) > XFS_LBSIZE(mp)) { if (show_warnings) - print_warning("invalid attr count in inode %llu", + print_warning( + "invalid attr nameidx in inode %llu", (long long)cur_ino); - continue; + break; } - - for (i = 0, entry = &leaf->entries[0]; i < nentries; - i++, entry++) { - if (be16_to_cpu(entry->nameidx) > XFS_LBSIZE(mp)) { + if (entry->flags & XFS_ATTR_LOCAL) { + local = xfs_attr3_leaf_name_local(leaf, i); + if (local->namelen == 0) { if (show_warnings) - print_warning("invalid attr nameidx " - "in inode %llu", - (long long)cur_ino); + print_warning( + "zero length for attr name in inode %llu", + (long long)cur_ino); break; } - if (entry->flags & XFS_ATTR_LOCAL) { - local = xfs_attr3_leaf_name_local(leaf, i); - if (local->namelen == 0) { - if (show_warnings) - print_warning("zero length for " - "attr name in inode %llu", - (long long)cur_ino); - break; - } - generate_obfuscated_name(0, local->namelen, - &local->nameval[0]); - memset(&local->nameval[local->namelen], 0, - be16_to_cpu(local->valuelen)); - } else { - remote = xfs_attr3_leaf_name_remote(leaf, i); - if (remote->namelen == 0 || - remote->valueblk == 0) { - if (show_warnings) - print_warning("invalid attr " - "entry in inode %llu", - (long long)cur_ino); - break; - } - generate_obfuscated_name(0, remote->namelen, - &remote->name[0]); - add_remote_vals(be32_to_cpu(remote->valueblk), + generate_obfuscated_name(0, local->namelen, + &local->nameval[0]); + memset(&local->nameval[local->namelen], 0, + be16_to_cpu(local->valuelen)); + } else { + remote = xfs_attr3_leaf_name_remote(leaf, i); + if (remote->namelen == 0 || remote->valueblk == 0) { + if (show_warnings) + print_warning( + "invalid attr entry in inode %llu", + (long long)cur_ino); + break; + } + generate_obfuscated_name(0, remote->namelen, + &remote->name[0]); + add_remote_vals(be32_to_cpu(remote->valueblk), be32_to_cpu(remote->valuelen)); + } + } +} + +static int +process_single_fsb_objects( + xfs_dfiloff_t o, + xfs_dfsbno_t s, + xfs_dfilblks_t c, + typnm_t btype, + xfs_dfiloff_t last) +{ + char *dp; + int ret = 0; + int i; + + for (i = 0; i < c; i++) { + push_cur(); + set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, s), blkbb, + DB_RING_IGN, NULL); + + if (!iocur_top->data) { + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, s); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, s); + + print_warning("cannot read %s block %u/%u (%llu)", + typtab[btype].name, agno, agbno, s); + if (stop_on_read_error) + ret = -EIO; + goto out_pop; + + } + + if (dont_obfuscate) + goto write; + + dp = iocur_top->data; + switch (btype) { + case TYP_DIR2: + if (o >= mp->m_dirleafblk) + break; + + obfuscate_dir_data_block(dp, o, + last == mp->m_dirblkfsbs); + iocur_top->need_crc = 1; + break; + case TYP_SYMLINK: + obfuscate_symlink_block(dp); + iocur_top->need_crc = 1; + break; + case TYP_ATTR: + obfuscate_attr_block(dp, o); + iocur_top->need_crc = 1; + break; + default: + break; + } + +write: + ret = write_buf(iocur_top); +out_pop: + pop_cur(); + if (ret) + break; + o++; + s++; + } + + return ret; +} + +/* + * Static map to aggregate multiple extents into a single directory block. + */ +static struct bbmap mfsb_map; +static int mfsb_length; + +static int +process_multi_fsb_objects( + xfs_dfiloff_t o, + xfs_dfsbno_t s, + xfs_dfilblks_t c, + typnm_t btype, + xfs_dfiloff_t last) +{ + int ret = 0; + + switch (btype) { + case TYP_DIR2: + break; + default: + print_warning("bad type for multi-fsb object %d", btype); + return -EINVAL; + } + + while (c > 0) { + unsigned int bm_len; + + if (mfsb_length + c >= mp->m_dirblkfsbs) { + bm_len = mp->m_dirblkfsbs - mfsb_length; + mfsb_length = 0; + } else { + mfsb_length += c; + bm_len = c; + } + + mfsb_map.b[mfsb_map.nmaps].bm_bn = XFS_FSB_TO_DADDR(mp, s); + mfsb_map.b[mfsb_map.nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len); + mfsb_map.nmaps++; + + if (mfsb_length == 0) { + push_cur(); + set_cur(&typtab[btype], 0, 0, DB_RING_IGN, &mfsb_map); + if (!iocur_top->data) { + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, s); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, s); + + print_warning("cannot read %s block %u/%u (%llu)", + typtab[btype].name, agno, agbno, s); + if (stop_on_read_error) + ret = -1; + goto out_pop; + + } + + if (dont_obfuscate || o >= mp->m_dirleafblk) { + ret = write_buf(iocur_top); + goto out_pop; } + + obfuscate_dir_data_block(iocur_top->data, o, + last == mp->m_dirblkfsbs); + iocur_top->need_crc = 1; + ret = write_buf(iocur_top); +out_pop: + pop_cur(); + mfsb_map.nmaps = 0; + if (ret) + break; } + c -= bm_len; + s += bm_len; } + + return ret; } /* inode copy routines */ - static int process_bmbt_reclist( xfs_bmbt_rec_t *rp, @@ -1346,6 +1496,7 @@ process_bmbt_reclist( xfs_dfiloff_t last; xfs_agnumber_t agno; xfs_agblock_t agbno; + int error; if (btype == TYP_DATA) return 1; @@ -1407,44 +1558,14 @@ process_bmbt_reclist( break; } - push_cur(); - set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, s), c * blkbb, - DB_RING_IGN, NULL); - if (iocur_top->data == NULL) { - print_warning("cannot read %s block %u/%u (%llu)", - typtab[btype].name, agno, agbno, s); - if (stop_on_read_error) { - pop_cur(); - return 0; - } + /* multi-extent blocks require special handling */ + if (btype != TYP_DIR2 || mp->m_dirblkfsbs == 1) { + error = process_single_fsb_objects(o, s, c, btype, last); } else { - if (!dont_obfuscate) - switch (btype) { - case TYP_DIR2: - if (o < mp->m_dirleafblk) - obfuscate_dir_data_blocks( - iocur_top->data, o, c, - last == mp->m_dirblkfsbs); - break; - - case TYP_SYMLINK: - obfuscate_symlink_blocks( - iocur_top->data, c); - break; - - case TYP_ATTR: - obfuscate_attr_blocks(iocur_top->data, - o, c); - break; - - default: ; - } - if (!write_buf(iocur_top)) { - pop_cur(); - return 0; - } + error = process_multi_fsb_objects(o, s, c, btype, last); } - pop_cur(); + if (error) + return 0; } return 1; @@ -1626,6 +1747,13 @@ process_inode_data( return 1; } +/* + * when we process the inode, we may change the data in the data and/or + * attribute fork if they are in short form and we are obfuscating names. + * In this case we need to recalculate the CRC of the inode, but we should + * only do that if the CRC in the inode is good to begin with. If the crc + * is not ok, we just leave it alone. + */ static int process_inode( xfs_agnumber_t agno, @@ -1633,18 +1761,30 @@ process_inode( xfs_dinode_t *dip) { int success; + bool crc_was_ok = false; /* no recalc by default */ + bool need_new_crc = false; success = 1; cur_ino = XFS_AGINO_TO_INO(mp, agno, agino); + /* we only care about crc recalculation if we are obfuscating names. */ + if (!dont_obfuscate) { + crc_was_ok = xfs_verify_cksum((char *)dip, + mp->m_sb.sb_inodesize, + offsetof(struct xfs_dinode, di_crc)); + } + /* copy appropriate data fork metadata */ switch (be16_to_cpu(dip->di_mode) & S_IFMT) { case S_IFDIR: - memset(&dir_data, 0, sizeof(dir_data)); success = process_inode_data(dip, TYP_DIR2); + if (dip->di_format == XFS_DINODE_FMT_LOCAL) + need_new_crc = 1; break; case S_IFLNK: success = process_inode_data(dip, TYP_SYMLINK); + if (dip->di_format == XFS_DINODE_FMT_LOCAL) + need_new_crc = 1; break; case S_IFREG: success = process_inode_data(dip, TYP_DATA); @@ -1659,6 +1799,7 @@ process_inode( attr_data.remote_val_count = 0; switch (dip->di_aformat) { case XFS_DINODE_FMT_LOCAL: + need_new_crc = 1; if (!dont_obfuscate) obfuscate_sf_attr(dip); break; @@ -1673,6 +1814,9 @@ process_inode( } nametable_clear(); } + + if (crc_was_ok && need_new_crc) + xfs_dinode_calc_crc(mp, dip); return success; } @@ -1743,12 +1887,9 @@ copy_inode_chunk( if (!process_inode(agno, agino + i, dip)) goto pop_out; - - /* calculate the new CRC for the inode */ - xfs_dinode_calc_crc(mp, dip); } skip_processing: - if (!write_buf(iocur_top)) + if (write_buf(iocur_top)) goto pop_out; inodes_copied += XFS_INODES_PER_CHUNK; @@ -1866,7 +2007,7 @@ scan_ag( if (stop_on_read_error) goto pop_out; } else { - if (!write_buf(iocur_top)) + if (write_buf(iocur_top)) goto pop_out; } @@ -1881,7 +2022,7 @@ scan_ag( if (stop_on_read_error) goto pop_out; } else { - if (!write_buf(iocur_top)) + if (write_buf(iocur_top)) goto pop_out; } @@ -1896,7 +2037,7 @@ scan_ag( if (stop_on_read_error) goto pop_out; } else { - if (!write_buf(iocur_top)) + if (write_buf(iocur_top)) goto pop_out; } @@ -1910,7 +2051,7 @@ scan_ag( if (stop_on_read_error) goto pop_out; } else { - if (!write_buf(iocur_top)) + if (write_buf(iocur_top)) goto pop_out; } @@ -2015,7 +2156,7 @@ copy_log(void) print_warning("cannot read log"); return !stop_on_read_error; } - return write_buf(iocur_top); + return !write_buf(iocur_top); } static int @@ -2121,7 +2262,7 @@ metadump_f( /* write the remaining index */ if (!exitcode) - exitcode = !write_index(); + exitcode = write_index() < 0; if (progress_since_warning) fputc('\n', (outf == stdout) ? stderr : stdout); diff --git a/db/write.c b/db/write.c index 091ddb3..7b34fc0 100644 --- a/db/write.c +++ b/db/write.c @@ -439,55 +439,78 @@ convert_oct( #define NYBBLE(x) (isdigit(x)?(x-'0'):(tolower(x)-'a'+0xa)) +/* + * convert_arg allows input in the following forms: + * + * - A string ("ABTB") whose ASCII value is placed in an array in the order + * matching the input. + * + * - An even number of hex numbers. If the length is greater than 64 bits, + * then the output is an array of bytes whose top nibble is the first hex + * digit in the input, the lower nibble is the second hex digit in the + * input. UUID entries are entered in this manner. + * + * - A decimal or hexadecimal integer to be used with setbitval(). + * + * Numbers that are passed to setbitval() need to be in big endian format and + * are adjusted in the buffer so that the first input bit is to be be written to + * the first bit in the output. + */ static char * convert_arg( - char *arg, - int bit_length) + char *arg, + int bit_length) { - int i; - static char *buf = NULL; - char *rbuf; - long long *value; - int alloc_size; - char *ostr; - int octval, ret; + int i; + int alloc_size; + int octval; + int offset; + int ret; + static char *buf = NULL; + char *endp; + char *rbuf; + char *ostr; + __u64 *value; + __u64 val = 0; if (bit_length <= 64) alloc_size = 8; else - alloc_size = (bit_length+7)/8; + alloc_size = (bit_length + 7) / 8; buf = xrealloc(buf, alloc_size); memset(buf, 0, alloc_size); - value = (long long *)buf; + value = (__u64 *)buf; rbuf = buf; if (*arg == '\"') { - /* handle strings */ + /* input a string and output ASCII array of characters */ /* zap closing quote if there is one */ - if ((ostr = strrchr(arg+1, '\"')) != NULL) + ostr = strrchr(arg + 1, '\"'); + if (ostr) *ostr = '\0'; - ostr = arg+1; + ostr = arg + 1; for (i = 0; i < alloc_size; i++) { if (!*ostr) break; - /* do octal */ + /* do octal conversion */ if (*ostr == '\\') { - if (*(ostr+1) >= '0' || *(ostr+1) <= '7') { - ret = convert_oct(ostr+1, &octval); + if (*(ostr + 1) >= '0' || *(ostr + 1) <= '7') { + ret = convert_oct(ostr + 1, &octval); *rbuf++ = octval; - ostr += ret+1; + ostr += ret + 1; continue; } } *rbuf++ = *ostr++; } - return buf; - } else if (arg[0] == '#' || ((arg[0] != '-') && strchr(arg,'-'))) { + } + + if (arg[0] == '#' || ((arg[0] != '-') && strchr(arg,'-'))) { /* * handle hex blocks ie * #00112233445566778899aabbccddeeff @@ -496,48 +519,79 @@ convert_arg( * * (but if it starts with "-" assume it's just an integer) */ - int bytes=bit_length/8; + int bytes = bit_length / NBBY; + + /* is this an array of hec numbers? */ + if (bit_length % NBBY) + return NULL; /* skip leading hash */ - if (*arg=='#') arg++; + if (*arg == '#') + arg++; while (*arg && bytes--) { - /* skip hypens */ - while (*arg=='-') arg++; - - /* get first nybble */ - if (!isxdigit((int)*arg)) return NULL; - *rbuf=NYBBLE((int)*arg)<<4; - arg++; - - /* skip more hyphens */ - while (*arg=='-') arg++; - - /* get second nybble */ - if (!isxdigit((int)*arg)) return NULL; - *rbuf++|=NYBBLE((int)*arg); - arg++; + /* skip hypens */ + while (*arg == '-') + arg++; + + /* get first nybble */ + if (!isxdigit((int)*arg)) + return NULL; + *rbuf = NYBBLE((int)*arg) << 4; + arg++; + + /* skip more hyphens */ + while (*arg == '-') + arg++; + + /* get second nybble */ + if (!isxdigit((int)*arg)) + return NULL; + *rbuf++ |= NYBBLE((int)*arg); + arg++; } - if (bytes<0&&*arg) return NULL; + if (bytes < 0 && *arg) + return NULL; + return buf; - } else { - /* - * handle integers - */ - *value = strtoll(arg, NULL, 0); - -#if __BYTE_ORDER == BIG_ENDIAN - /* hackery for big endian */ - if (bit_length <= 8) { - rbuf += 7; - } else if (bit_length <= 16) { - rbuf += 6; - } else if (bit_length <= 32) { - rbuf += 4; - } -#endif - return rbuf; } + + /* handle decimal / hexadecimal integers */ + val = strtoll(arg, &endp, 0); + /* return if not a clean number */ + if (*endp != '\0') + return NULL; + + /* Does the value fit into the range of the destination bitfield? */ + if ((val >> bit_length) > 0) + return NULL; + /* + * If the length of the field is not a multiple of a byte, push + * the bits up in the field, so the most signicant field bit is + * the most significant bit in the byte: + * + * before: + * val |----|----|----|----|----|--MM|mmmm|llll| + * after + * val |----|----|----|----|----|MMmm|mmll|ll00| + */ + offset = bit_length % NBBY; + if (offset) + val <<= (NBBY - offset); + + /* + * convert to big endian and copy into the array + * rbuf |----|----|----|----|----|MMmm|mmll|ll00| + */ + *value = cpu_to_be64(val); + + /* + * Align the array to point to the field in the array. + * rbuf = |MMmm|mmll|ll00| + */ + offset = sizeof(__be64) - 1 - ((bit_length - 1) / sizeof(__be64)); + rbuf += offset; + return rbuf; } @@ -550,9 +604,9 @@ write_struct( { const ftattr_t *fa; flist_t *fl; - flist_t *sfl; - int bit_length; - char *buf; + flist_t *sfl; + int bit_length; + char *buf; int parentoffset; if (argc != 2) { diff --git a/db/xfs_metadump.sh b/db/xfs_metadump.sh index 28b04b8..a95d5a5 100755 --- a/db/xfs_metadump.sh +++ b/db/xfs_metadump.sh @@ -5,9 +5,9 @@ OPTS=" " DBOPTS=" " -USAGE="Usage: xfs_metadump [-efogwV] [-m max_extents] [-l logdev] source target" +USAGE="Usage: xfs_metadump [-efFogwV] [-m max_extents] [-l logdev] source target" -while getopts "efgl:m:owV" c +while getopts "efgl:m:owFV" c do case $c in e) OPTS=$OPTS"-e ";; @@ -17,6 +17,7 @@ do w) OPTS=$OPTS"-w ";; f) DBOPTS=$DBOPTS" -f";; l) DBOPTS=$DBOPTS" -l "$OPTARG" ";; + F) DBOPTS=$DBOPTS" -F";; V) xfs_db -p xfs_metadump -V status=$? exit $status @@ -29,7 +30,7 @@ done set -- extra $@ shift $OPTIND case $# in - 2) xfs_db$DBOPTS -F -i -p xfs_metadump -c "metadump$OPTS $2" $1 + 2) xfs_db$DBOPTS -i -p xfs_metadump -c "metadump$OPTS $2" $1 status=$? ;; *) echo $USAGE 1>&2 diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c index 2df68fb..fb7eda8 100644 --- a/growfs/xfs_growfs.c +++ b/growfs/xfs_growfs.c @@ -189,7 +189,7 @@ main(int argc, char **argv) usage(); if (iflag && xflag) usage(); - if (dflag + lflag + rflag == 0) + if (dflag + lflag + rflag + mflag == 0) aflag = 1; fs_table_initialise(0, NULL, 0, NULL); @@ -305,12 +305,15 @@ main(int argc, char **argv) drsize -= (drsize % 2); error = 0; - if (dflag | aflag) { + + if (dflag | mflag | aflag) { xfs_growfs_data_t in; if (!mflag) maxpct = geo.imaxpct; - if (!dsize) + if (!dflag && !aflag) /* Only mflag, no data size change */ + dsize = geo.datablocks; + else if (!dsize) dsize = ddsize / (geo.blocksize / BBSIZE); else if (dsize > ddsize / (geo.blocksize / BBSIZE)) { fprintf(stderr, _( diff --git a/include/cache.h b/include/cache.h index 76cb234..0a84c69 100644 --- a/include/cache.h +++ b/include/cache.h @@ -66,7 +66,8 @@ typedef void (*cache_walk_t)(struct cache_node *); typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t); typedef void (*cache_node_flush_t)(struct cache_node *); typedef void (*cache_node_relse_t)(struct cache_node *); -typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int); +typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int, + unsigned int); typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t); typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *); @@ -112,6 +113,7 @@ struct cache { cache_node_compare_t compare; /* comparison routine */ cache_bulk_relse_t bulkrelse; /* bulk release routine */ unsigned int c_hashsize; /* hash bucket count */ + unsigned int c_hashshift; /* hash key shift */ struct cache_hash *c_hash; /* hash table buckets */ struct cache_mru c_mrus[CACHE_MAX_PRIORITY + 1]; unsigned long long c_misses; /* cache misses */ diff --git a/include/darwin.h b/include/darwin.h index 97b8990..95f865b 100644 --- a/include/darwin.h +++ b/include/darwin.h @@ -150,6 +150,7 @@ typedef unsigned char uchar_t; #define ENOATTR 989 /* Attribute not found */ #define EFSCORRUPTED 990 /* Filesystem is corrupted */ +#define EFSBADCRC 991 /* Bad CRC detected */ #define constpp char * const * #define HAVE_FID 1 diff --git a/include/freebsd.h b/include/freebsd.h index 2e1ae49..b51688b 100644 --- a/include/freebsd.h +++ b/include/freebsd.h @@ -45,6 +45,7 @@ #define constpp char * const * #define EFSCORRUPTED 990 /* Filesystem is corrupted */ +#define EFSBADCRC 991 /* Bad CRC detected */ typedef off_t xfs_off_t; typedef off_t off64_t; diff --git a/include/gnukfreebsd.h b/include/gnukfreebsd.h index 1ec291f..2140acd 100644 --- a/include/gnukfreebsd.h +++ b/include/gnukfreebsd.h @@ -36,6 +36,7 @@ #define constpp char * const * #define EFSCORRUPTED 990 /* Filesystem is corrupted */ +#define EFSBADCRC 991 /* Bad CRC detected */ typedef off_t xfs_off_t; typedef __uint64_t xfs_ino_t; diff --git a/include/irix.h b/include/irix.h index a450684..5040451 100644 --- a/include/irix.h +++ b/include/irix.h @@ -52,6 +52,8 @@ typedef char* xfs_caddr_t; #define xfs_flock64 flock64 #define xfs_flock64_t struct flock64 +#define EFSBADCRC 991 /* Bad CRC detected */ + typedef struct xfs_error_injection { __int32_t fd; __int32_t errtag; diff --git a/include/libxfs.h b/include/libxfs.h index 4bf331c..6bc6c94 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -144,6 +144,7 @@ extern void libxfs_device_close (dev_t); extern int libxfs_device_alignment (void); extern void libxfs_report(FILE *); extern void platform_findsizes(char *path, int fd, long long *sz, int *bsz); +extern int platform_nproc(void); /* check or write log footer: specify device, log size in blocks & uuid */ typedef xfs_caddr_t (libxfs_get_block_t)(xfs_caddr_t, int, void *); @@ -364,7 +365,7 @@ enum xfs_buf_flags_t { /* b_flags bits */ #define XFS_BUF_PRIORITY(bp) (cache_node_get_priority( \ (struct cache_node *)(bp))) #define xfs_buf_set_ref(bp,ref) ((void) 0) -#define xfs_buf_ioerror(bp,err) (bp)->b_error = (err); +#define xfs_buf_ioerror(bp,err) ((bp)->b_error = (err)) #define xfs_daddr_to_agno(mp,d) \ ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) @@ -392,9 +393,9 @@ extern struct cache_operations libxfs_bcache_operations; #define libxfs_getbuf(dev, daddr, len) \ libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \ (dev), (daddr), (len)) -#define libxfs_getbuf_map(dev, map, nmaps) \ +#define libxfs_getbuf_map(dev, map, nmaps, flags) \ libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \ - (dev), (map), (nmaps)) + (dev), (map), (nmaps), (flags)) #define libxfs_getbuf_flags(dev, daddr, len, flags) \ libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \ (dev), (daddr), (len), (flags)) @@ -412,7 +413,7 @@ extern int libxfs_trace_writebuf(const char *, const char *, int, extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, struct xfs_buftarg *, xfs_daddr_t, int); extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int, - struct xfs_buftarg *, struct xfs_buf_map *, int); + struct xfs_buftarg *, struct xfs_buf_map *, int, int); extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int, struct xfs_buftarg *, xfs_daddr_t, int, unsigned int); extern void libxfs_trace_putbuf (const char *, const char *, int, @@ -427,7 +428,7 @@ extern xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, extern int libxfs_writebuf(xfs_buf_t *, int); extern xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int); extern xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, - struct xfs_buf_map *, int); + struct xfs_buf_map *, int, int); extern xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int, unsigned int); extern void libxfs_putbuf (xfs_buf_t *); @@ -448,8 +449,7 @@ extern void libxfs_putbufr(xfs_buf_t *); extern int libxfs_writebuf_int(xfs_buf_t *, int); extern int libxfs_writebufr(struct xfs_buf *); extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, xfs_buf_t *, int, int); -extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, - struct xfs_buf_map *, int, int); +extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int); extern int libxfs_bhash_size; @@ -779,6 +779,20 @@ extern uint32_t crc32c_le(uint32_t crc, unsigned char const *p, size_t len); #include +static inline int +xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ + return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), + cksum_offset); +} + +static inline void +xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ + xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), + cksum_offset); +} + #define xfs_notice(mp,fmt,args...) cmn_err(CE_NOTE,fmt, ## args) #define xfs_warn(mp,fmt,args...) cmn_err(CE_WARN,fmt, ## args) #define xfs_alert(mp,fmt,args...) cmn_err(CE_ALERT,fmt, ## args) diff --git a/include/linux.h b/include/linux.h index 502fd1f..5586290 100644 --- a/include/linux.h +++ b/include/linux.h @@ -136,6 +136,7 @@ platform_discard_blocks(int fd, uint64_t start, uint64_t len) #define ENOATTR ENODATA /* Attribute not found */ #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ typedef loff_t xfs_off_t; typedef __uint64_t xfs_ino_t; diff --git a/include/xfs_ag.h b/include/xfs_ag.h index 3fc1098..0fdd410 100644 --- a/include/xfs_ag.h +++ b/include/xfs_ag.h @@ -89,6 +89,8 @@ typedef struct xfs_agf { /* structure must be padded to 64 bit alignment */ } xfs_agf_t; +#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) + #define XFS_AGF_MAGICNUM 0x00000001 #define XFS_AGF_VERSIONNUM 0x00000002 #define XFS_AGF_SEQNO 0x00000004 @@ -167,6 +169,8 @@ typedef struct xfs_agi { /* structure must be padded to 64 bit alignment */ } xfs_agi_t; +#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) + #define XFS_AGI_MAGICNUM 0x00000001 #define XFS_AGI_VERSIONNUM 0x00000002 #define XFS_AGI_SEQNO 0x00000004 @@ -222,6 +226,8 @@ typedef struct xfs_agfl { __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ } xfs_agfl_t; +#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) + /* * tags for inode radix tree */ diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h index e5869b5..623bbe8 100644 --- a/include/xfs_dinode.h +++ b/include/xfs_dinode.h @@ -89,6 +89,8 @@ typedef struct xfs_dinode { /* structure must be padded to 64 bit alignment */ } xfs_dinode_t; +#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) + #define DI_MAX_FLUSH 0xffff /* diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h index 9910401..3900130 100644 --- a/include/xfs_dir2.h +++ b/include/xfs_dir2.h @@ -57,6 +57,9 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, uint resblks); +#define S_SHIFT 12 +extern const unsigned char xfs_mode_to_ftype[]; + /* * Direct call from the bmap code, bypassing the generic directory layer. */ diff --git a/include/xfs_format.h b/include/xfs_format.h index 997c770..77f6b8b 100644 --- a/include/xfs_format.h +++ b/include/xfs_format.h @@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr { __be64 sl_lsn; }; +#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) + /* * The maximum pathlen is 1024 bytes. Since the minimum file system * blocksize is 512 bytes, we can get a max of 3 extents back from diff --git a/include/xfs_sb.h b/include/xfs_sb.h index 35061d4..f7b2fe7 100644 --- a/include/xfs_sb.h +++ b/include/xfs_sb.h @@ -182,6 +182,8 @@ typedef struct xfs_sb { /* must be padded to 64 bit alignment */ } xfs_sb_t; +#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) + /* * Superblock - on disk version. Must match the in core version above. * Must be padded to 64 bit alignment. diff --git a/io/file.c b/io/file.c index db85ffc..73b893f 100644 --- a/io/file.c +++ b/io/file.c @@ -36,7 +36,7 @@ print_fileio( int index, int braces) { - printf(_("%c%03d%c %-14s (%s,%s,%s,%s%s%s%s)\n"), + printf(_("%c%03d%c %-14s (%s,%s,%s,%s%s%s%s%s)\n"), braces? '[' : ' ', index, braces? ']' : ' ', file->name, file->flags & IO_FOREIGN ? _("foreign") : _("xfs"), file->flags & IO_OSYNC ? _("sync") : _("non-sync"), @@ -44,7 +44,8 @@ print_fileio( file->flags & IO_READONLY ? _("read-only") : _("read-write"), file->flags & IO_REALTIME ? _(",real-time") : "", file->flags & IO_APPEND ? _(",append-only") : "", - file->flags & IO_NONBLOCK ? _(",non-block") : ""); + file->flags & IO_NONBLOCK ? _(",non-block") : "", + file->flags & IO_TMPFILE ? _(",tmpfile") : ""); } int diff --git a/io/imap.c b/io/imap.c index 0a4f14e..4f3f883 100644 --- a/io/imap.c +++ b/io/imap.c @@ -67,7 +67,7 @@ imap_init(void) imap_cmd.name = "imap"; imap_cmd.cfunc = imap_f; imap_cmd.argmin = 0; - imap_cmd.argmax = 0; + imap_cmd.argmax = 1; imap_cmd.args = _("[nentries]"); imap_cmd.flags = CMD_NOMAP_OK; imap_cmd.oneline = _("inode map for filesystem of current file"); diff --git a/io/init.c b/io/init.c index ef9e4cb..1e2690e 100644 --- a/io/init.c +++ b/io/init.c @@ -136,7 +136,7 @@ init( pagesize = getpagesize(); gettimeofday(&stopwatch, NULL); - while ((c = getopt(argc, argv, "ac:dFfmp:nrRstVx")) != EOF) { + while ((c = getopt(argc, argv, "ac:dFfmp:nrRstTVx")) != EOF) { switch (c) { case 'a': flags |= IO_APPEND; @@ -179,6 +179,9 @@ init( case 'R': flags |= IO_REALTIME; break; + case 'T': + flags |= IO_TMPFILE; + break; case 'x': expert = 1; break; diff --git a/io/io.h b/io/io.h index 6c3f627..0d2d768 100644 --- a/io/io.h +++ b/io/io.h @@ -35,6 +35,7 @@ #define IO_TRUNC (1<<6) #define IO_FOREIGN (1<<7) #define IO_NONBLOCK (1<<8) +#define IO_TMPFILE (1<<9) /* * Regular file I/O control diff --git a/io/open.c b/io/open.c index cc677e6..c106fa7 100644 --- a/io/open.c +++ b/io/open.c @@ -22,6 +22,22 @@ #include "init.h" #include "io.h" +#ifndef __O_TMPFILE +#if defined __alpha__ +#define __O_TMPFILE 0100000000 +#elif defined(__hppa__) +#define __O_TMPFILE 040000000 +#elif defined(__sparc__) +#define __O_TMPFILE 0x2000000 +#else +#define __O_TMPFILE 020000000 +#endif +#endif /* __O_TMPFILE */ + +#ifndef O_TMPFILE +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#endif + static cmdinfo_t open_cmd; static cmdinfo_t stat_cmd; static cmdinfo_t close_cmd; @@ -77,13 +93,14 @@ stat_f( int verbose = (argc == 2 && !strcmp(argv[1], "-v")); printf(_("fd.path = \"%s\"\n"), file->name); - printf(_("fd.flags = %s,%s,%s%s%s%s\n"), + printf(_("fd.flags = %s,%s,%s%s%s%s%s\n"), file->flags & IO_OSYNC ? _("sync") : _("non-sync"), file->flags & IO_DIRECT ? _("direct") : _("non-direct"), file->flags & IO_READONLY ? _("read-only") : _("read-write"), file->flags & IO_REALTIME ? _(",real-time") : "", file->flags & IO_APPEND ? _(",append-only") : "", - file->flags & IO_NONBLOCK ? _(",non-block") : ""); + file->flags & IO_NONBLOCK ? _(",non-block") : "", + file->flags & IO_TMPFILE ? _(",tmpfile") : ""); if (fstat64(file->fd, &st) < 0) { perror("fstat64"); } else { @@ -143,10 +160,13 @@ openfile( oflags |= O_TRUNC; if (flags & IO_NONBLOCK) oflags |= O_NONBLOCK; + if (flags & IO_TMPFILE) + oflags |= O_TMPFILE; fd = open(path, oflags, mode); if (fd < 0) { - if ((errno == EISDIR) && (oflags & O_RDWR)) { + if (errno == EISDIR && + ((oflags & (O_RDWR|O_TMPFILE)) == O_RDWR)) { /* make it as if we asked for O_RDONLY & try again */ oflags &= ~O_RDWR; oflags |= O_RDONLY; @@ -248,6 +268,7 @@ open_help(void) " -s -- open with O_SYNC\n" " -t -- open with O_TRUNC (truncate the file to zero length if it exists)\n" " -R -- mark the file as a realtime XFS file immediately after opening it\n" +" -T -- open with O_TMPFILE (create a file not visible in the namespace)\n" " Note1: usually read/write direct IO requests must be blocksize aligned;\n" " some kernels, however, allow sectorsize alignment for direct IO.\n" " Note2: the bmap for non-regular files can be obtained provided the file\n" @@ -272,7 +293,7 @@ open_f( return 0; } - while ((c = getopt(argc, argv, "FRacdfm:nrstx")) != EOF) { + while ((c = getopt(argc, argv, "FRTacdfm:nrstx")) != EOF) { switch (c) { case 'F': /* Ignored / deprecated now, handled automatically */ @@ -310,6 +331,9 @@ open_f( case 'x': /* backwards compatibility */ flags |= IO_REALTIME; break; + case 'T': + flags |= IO_TMPFILE; + break; default: return command_usage(&open_cmd); } @@ -318,6 +342,11 @@ open_f( if (optind != argc - 1) return command_usage(&open_cmd); + if ((flags & (IO_READONLY|IO_TMPFILE)) == (IO_READONLY|IO_TMPFILE)) { + fprintf(stderr, _("-T and -r options are incompatible\n")); + return -1; + } + fd = openfile(argv[optind], &geometry, flags, mode); if (fd < 0) return 0; @@ -731,7 +760,7 @@ open_init(void) open_cmd.argmin = 0; open_cmd.argmax = -1; open_cmd.flags = CMD_NOMAP_OK | CMD_NOFILE_OK | CMD_FOREIGN_OK; - open_cmd.args = _("[-acdrstx] [path]"); + open_cmd.args = _("[-acdrstxT] [path]"); open_cmd.oneline = _("open the file specified by path"); open_cmd.help = open_help; diff --git a/io/prealloc.c b/io/prealloc.c index 8380646..aba6b44 100644 --- a/io/prealloc.c +++ b/io/prealloc.c @@ -29,6 +29,14 @@ #define FALLOC_FL_PUNCH_HOLE 0x02 #endif +#ifndef FALLOC_FL_COLLAPSE_RANGE +#define FALLOC_FL_COLLAPSE_RANGE 0x08 +#endif + +#ifndef FALLOC_FL_ZERO_RANGE +#define FALLOC_FL_ZERO_RANGE 0x10 +#endif + static cmdinfo_t allocsp_cmd; static cmdinfo_t freesp_cmd; static cmdinfo_t resvsp_cmd; @@ -37,6 +45,8 @@ static cmdinfo_t zero_cmd; #if defined(HAVE_FALLOCATE) static cmdinfo_t falloc_cmd; static cmdinfo_t fpunch_cmd; +static cmdinfo_t fcollapse_cmd; +static cmdinfo_t fzero_cmd; #endif static int @@ -159,8 +169,11 @@ fallocate_f( int mode = 0; int c; - while ((c = getopt(argc, argv, "kp")) != EOF) { + while ((c = getopt(argc, argv, "ckp")) != EOF) { switch (c) { + case 'c': + mode = FALLOC_FL_COLLAPSE_RANGE; + break; case 'k': mode = FALLOC_FL_KEEP_SIZE; break; @@ -203,6 +216,50 @@ fpunch_f( } return 0; } + +static int +fcollapse_f( + int argc, + char **argv) +{ + xfs_flock64_t segment; + int mode = FALLOC_FL_COLLAPSE_RANGE; + + if (!offset_length(argv[1], argv[2], &segment)) + return 0; + + if (fallocate(file->fd, mode, + segment.l_start, segment.l_len)) { + perror("fallocate"); + return 0; + } + return 0; +} + +static int +fzero_f( + int argc, + char **argv) +{ + xfs_flock64_t segment; + int mode = FALLOC_FL_ZERO_RANGE; + int index = 1; + + if (strncmp(argv[index], "-k", 3) == 0) { + mode |= FALLOC_FL_KEEP_SIZE; + index++; + } + + if (!offset_length(argv[index], argv[index + 1], &segment)) + return 0; + + if (fallocate(file->fd, mode, + segment.l_start, segment.l_len)) { + perror("fallocate"); + return 0; + } + return 0; +} #endif /* HAVE_FALLOCATE */ void @@ -263,9 +320,9 @@ prealloc_init(void) falloc_cmd.argmin = 2; falloc_cmd.argmax = -1; falloc_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; - falloc_cmd.args = _("[-k] [-p] off len"); + falloc_cmd.args = _("[-c] [-k] [-p] off len"); falloc_cmd.oneline = - _("allocates space associated with part of a file via fallocate"); + _("allocates space associated with part of a file via fallocate"); add_command(&falloc_cmd); fpunch_cmd.name = "fpunch"; @@ -275,7 +332,27 @@ prealloc_init(void) fpunch_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; fpunch_cmd.args = _("off len"); fpunch_cmd.oneline = - _("de-allocates space assocated with part of a file via fallocate"); + _("de-allocates space assocated with part of a file via fallocate"); add_command(&fpunch_cmd); + + fcollapse_cmd.name = "fcollapse"; + fcollapse_cmd.cfunc = fcollapse_f; + fcollapse_cmd.argmin = 2; + fcollapse_cmd.argmax = 2; + fcollapse_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; + fcollapse_cmd.args = _("off len"); + fcollapse_cmd.oneline = + _("de-allocates space and eliminates the hole by shifting extents"); + add_command(&fcollapse_cmd); + + fzero_cmd.name = "fzero"; + fzero_cmd.cfunc = fzero_f; + fzero_cmd.argmin = 2; + fzero_cmd.argmax = 3; + fzero_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; + fzero_cmd.args = _("[-k] off len"); + fzero_cmd.oneline = + _("zeroes space and eliminates holes by preallocating"); + add_command(&fzero_cmd); #endif /* HAVE_FALLOCATE */ } diff --git a/libxfs/cache.c b/libxfs/cache.c index 84d2860..dc69689 100644 --- a/libxfs/cache.c +++ b/libxfs/cache.c @@ -25,6 +25,7 @@ #include #include #include +#include #define CACHE_DEBUG 1 #undef CACHE_DEBUG @@ -61,6 +62,7 @@ cache_init( cache->c_misses = 0; cache->c_maxcount = maxcount; cache->c_hashsize = hashsize; + cache->c_hashshift = libxfs_highbit32(hashsize); cache->hash = cache_operations->hash; cache->alloc = cache_operations->alloc; cache->flush = cache_operations->flush; @@ -343,7 +345,7 @@ cache_node_get( int priority = 0; int purged = 0; - hashidx = cache->hash(key, cache->c_hashsize); + hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift); hash = cache->c_hash + hashidx; head = &hash->ch_list; @@ -515,7 +517,8 @@ cache_node_purge( struct cache_hash * hash; int count = -1; - hash = cache->c_hash + cache->hash(key, cache->c_hashsize); + hash = cache->c_hash + cache->hash(key, cache->c_hashsize, + cache->c_hashshift); head = &hash->ch_list; pthread_mutex_lock(&hash->ch_mutex); for (pos = head->next, n = pos->next; pos != head; diff --git a/libxfs/init.h b/libxfs/init.h index f0b8cb6..112febb 100644 --- a/libxfs/init.h +++ b/libxfs/init.h @@ -31,7 +31,6 @@ extern char *platform_findrawpath (char *path); extern char *platform_findblockpath (char *path); extern int platform_direct_blockdev (void); extern int platform_align_blockdev (void); -extern int platform_nproc(void); extern unsigned long platform_physmem(void); /* in kilobytes */ extern int platform_has_uuid; diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c index 0219a08..1b691fb 100644 --- a/libxfs/rdwr.c +++ b/libxfs/rdwr.c @@ -203,7 +203,8 @@ xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int, int, const struct xfs_buf_ops *); int libxfs_writebuf(xfs_buf_t *, int); xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int); -xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int); +xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, + int, int); xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int, unsigned int); void libxfs_putbuf (xfs_buf_t *); @@ -255,9 +256,10 @@ libxfs_trace_getbuf(const char *func, const char *file, int line, xfs_buf_t * libxfs_trace_getbuf_map(const char *func, const char *file, int line, - struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) + struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, + int flags) { - xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps); + xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags); __add_trace(bp, func, file, line); return bp; } @@ -311,10 +313,18 @@ struct xfs_bufkey { int nmaps; }; +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL +#define CACHE_LINE_SIZE 64 static unsigned int -libxfs_bhash(cache_key_t key, unsigned int hashsize) +libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift) { - return (((unsigned int)((struct xfs_bufkey *)key)->blkno) >> 5) % hashsize; + uint64_t hashval = ((struct xfs_bufkey *)key)->blkno; + uint64_t tmp; + + tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); + return tmp % hashsize; } static int @@ -582,11 +592,16 @@ libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len) } struct xfs_buf * -libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) +libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, + int nmaps, int flags) { struct xfs_bufkey key = {0}; int i; + if (nmaps == 1) + return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, + flags); + key.buftarg = btp; key.blkno = map[0].bm_bn; for (i = 0; i < nmaps; i++) { @@ -595,7 +610,7 @@ libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) key.map = map; key.nmaps = nmaps; - return __cache_lookup(&key, 0); + return __cache_lookup(&key, flags); } void @@ -724,27 +739,19 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags, } int -libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, - struct xfs_buf_map *map, int nmaps, int flags) +libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) { int fd = libxfs_device_to_fd(btp->dev); int error = 0; char *buf; int i; - ASSERT(BBTOB(len) <= bp->b_bcount); - - ASSERT(bp->b_nmaps == nmaps); - fd = libxfs_device_to_fd(btp->dev); buf = bp->b_addr; for (i = 0; i < bp->b_nmaps; i++) { off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn); int len = BBTOB(bp->b_map[i].bm_len); - ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn); - ASSERT(bp->b_map[i].bm_len == map[i].bm_len); - error = __read_buf(fd, buf, len, offset, flags); if (error) { bp->b_error = error; @@ -775,7 +782,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len, flags, ops); - bp = libxfs_getbuf_map(btp, map, nmaps); + bp = libxfs_getbuf_map(btp, map, nmaps, 0); if (!bp) return NULL; @@ -784,7 +791,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) return bp; - error = libxfs_readbufr_map(btp, bp, map, nmaps, flags); + error = libxfs_readbufr_map(btp, bp, flags); if (!error) { bp->b_flags |= LIBXFS_B_UPTODATE; if (bp->b_ops) @@ -891,6 +898,11 @@ libxfs_writebufr(xfs_buf_t *bp) int libxfs_writebuf_int(xfs_buf_t *bp, int flags) { + /* + * Clear any error hanging over from reading the buffer. This prevents + * subsequent reads after this write from seeing stale errors. + */ + bp->b_error = 0; bp->b_flags |= (LIBXFS_B_DIRTY | flags); return 0; } @@ -904,6 +916,11 @@ libxfs_writebuf(xfs_buf_t *bp, int flags) (long long)LIBXFS_BBTOOFF64(bp->b_bn), (long long)bp->b_bn); #endif + /* + * Clear any error hanging over from reading the buffer. This prevents + * subsequent reads after this write from seeing stale errors. + */ + bp->b_error = 0; bp->b_flags |= (LIBXFS_B_DIRTY | flags); libxfs_putbuf(bp); return 0; diff --git a/libxfs/trans.c b/libxfs/trans.c index 6a05673..c443863 100644 --- a/libxfs/trans.c +++ b/libxfs/trans.c @@ -511,7 +511,7 @@ libxfs_trans_get_buf_map( xfs_buf_log_item_t *bip; if (tp == NULL) - return libxfs_getbuf_map(btp, map, nmaps); + return libxfs_getbuf_map(btp, map, nmaps, 0); bp = xfs_trans_buf_item_match(tp, btp, map, nmaps); if (bp != NULL) { @@ -522,7 +522,7 @@ libxfs_trans_get_buf_map( return bp; } - bp = libxfs_getbuf_map(btp, map, nmaps); + bp = libxfs_getbuf_map(btp, map, nmaps, 0); if (bp == NULL) return NULL; #ifdef XACT_DEBUG @@ -694,7 +694,6 @@ inode_item_done( xfs_mount_t *mp; xfs_buf_t *bp; int error; - extern kmem_zone_t *xfs_ili_zone; ip = iip->ili_inode; mp = iip->ili_item.li_mountp; @@ -736,15 +735,9 @@ ili_done: if (iip->ili_lock_flags) { iip->ili_lock_flags = 0; return; - } else { - libxfs_iput(ip, 0); } - - if (ip->i_itemp) - kmem_zone_free(xfs_ili_zone, ip->i_itemp); - else - ASSERT(0); - ip->i_itemp = NULL; + /* free the inode */ + libxfs_iput(ip, 0); } static void diff --git a/libxfs/util.c b/libxfs/util.c index 8109ab3..1b05540 100644 --- a/libxfs/util.c +++ b/libxfs/util.c @@ -730,3 +730,16 @@ cmn_err(int level, char *fmt, ...) fputs("\n", stderr); va_end(ap); } + +/* + * Warnings specifically for verifier errors. Differentiate CRC vs. invalid + * values, and omit the stack trace unless the error level is tuned high. + */ +void +xfs_verifier_error( + struct xfs_buf *bp) +{ + xfs_alert(NULL, "Metadata %s detected at block 0x%llx/0x%x", + bp->b_error == EFSBADCRC ? "CRC error" : "corruption", + bp->b_bn, BBTOB(bp->b_length)); +} diff --git a/libxfs/xfs.h b/libxfs/xfs.h index 364fd83..5a21590 100644 --- a/libxfs/xfs.h +++ b/libxfs/xfs.h @@ -449,3 +449,4 @@ int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); void xfs_trans_mod_sb(xfs_trans_t *, uint, long); void xfs_trans_init(struct xfs_mount *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); +void xfs_verifier_error(struct xfs_buf *bp); diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c index e4fb1ad..6c82be0 100644 --- a/libxfs/xfs_alloc.c +++ b/libxfs/xfs_alloc.c @@ -452,7 +452,6 @@ xfs_agfl_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - int agfl_ok = 1; /* * There is no verification of non-crc AGFLs because mkfs does not @@ -463,15 +462,13 @@ xfs_agfl_read_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agfl, agfl_crc)); - - agfl_ok = agfl_ok && xfs_agfl_verify(bp); - - if (!agfl_ok) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_agfl_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -486,16 +483,15 @@ xfs_agfl_write_verify( return; if (!xfs_agfl_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } if (bip) XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agfl, agfl_crc)); + xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); } const struct xfs_buf_ops xfs_agfl_buf_ops = { @@ -2218,19 +2214,17 @@ xfs_agf_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - int agf_ok = 1; - - if (xfs_sb_version_hascrc(&mp->m_sb)) - agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agf, agf_crc)); - agf_ok = agf_ok && xfs_agf_verify(mp, bp); - - if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, - XFS_RANDOM_ALLOC_READ_AGF))) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, + XFS_ERRTAG_ALLOC_READ_AGF, + XFS_RANDOM_ALLOC_READ_AGF)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -2241,8 +2235,8 @@ xfs_agf_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -2252,8 +2246,7 @@ xfs_agf_write_verify( if (bip) XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agf, agf_crc)); + xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); } const struct xfs_buf_ops xfs_agf_buf_ops = { diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c index 282a320..215be7e 100644 --- a/libxfs/xfs_alloc_btree.c +++ b/libxfs/xfs_alloc_btree.c @@ -337,12 +337,14 @@ static void xfs_allocbt_read_verify( struct xfs_buf *bp) { - if (!(xfs_btree_sblock_verify_crc(bp) && - xfs_allocbt_verify(bp))) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); + if (!xfs_btree_sblock_verify_crc(bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_allocbt_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_verifier_error(bp); } } @@ -352,9 +354,9 @@ xfs_allocbt_write_verify( { if (!xfs_allocbt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_btree_sblock_calc_crc(bp); diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c index fd52397..f7f02ae 100644 --- a/libxfs/xfs_attr_leaf.c +++ b/libxfs/xfs_attr_leaf.c @@ -187,8 +187,8 @@ xfs_attr3_leaf_write_verify( struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_attr3_leaf_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -198,7 +198,7 @@ xfs_attr3_leaf_write_verify( if (bip) hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); } /* @@ -213,13 +213,14 @@ xfs_attr3_leaf_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_ATTR3_LEAF_CRC_OFF)) || - !xfs_attr3_leaf_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_attr3_leaf_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { diff --git a/libxfs/xfs_attr_remote.c b/libxfs/xfs_attr_remote.c index 59bb12d..5cf5c73 100644 --- a/libxfs/xfs_attr_remote.c +++ b/libxfs/xfs_attr_remote.c @@ -100,7 +100,6 @@ xfs_attr3_rmt_read_verify( struct xfs_mount *mp = bp->b_target->bt_mount; char *ptr; int len; - bool corrupt = false; xfs_daddr_t bno; /* no verification of non-crc buffers */ @@ -115,11 +114,11 @@ xfs_attr3_rmt_read_verify( while (len > 0) { if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF)) { - corrupt = true; + xfs_buf_ioerror(bp, EFSBADCRC); break; } if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { - corrupt = true; + xfs_buf_ioerror(bp, EFSCORRUPTED); break; } len -= XFS_LBSIZE(mp); @@ -127,10 +126,9 @@ xfs_attr3_rmt_read_verify( bno += mp->m_bsize; } - if (corrupt) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); - } else + if (bp->b_error) + xfs_verifier_error(bp); + else ASSERT(len == 0); } @@ -155,9 +153,8 @@ xfs_attr3_rmt_write_verify( while (len > 0) { if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { - XFS_CORRUPTION_ERROR(__func__, - XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } if (bip) { diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c index 3136e4f..a4bd69d 100644 --- a/libxfs/xfs_bmap_btree.c +++ b/libxfs/xfs_bmap_btree.c @@ -759,12 +759,14 @@ static void xfs_bmbt_read_verify( struct xfs_buf *bp) { - if (!(xfs_btree_lblock_verify_crc(bp) && - xfs_bmbt_verify(bp))) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); + if (!xfs_btree_lblock_verify_crc(bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_bmbt_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_verifier_error(bp); } } @@ -773,11 +775,9 @@ xfs_bmbt_write_verify( struct xfs_buf *bp) { if (!xfs_bmbt_verify(bp)) { - xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn); trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } xfs_btree_lblock_calc_crc(bp); diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c index 2dd6fb7..9be4abd 100644 --- a/libxfs/xfs_btree.c +++ b/libxfs/xfs_btree.c @@ -218,8 +218,7 @@ xfs_btree_lblock_calc_crc( return; if (bip) block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_LBLOCK_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); } bool @@ -227,8 +226,8 @@ xfs_btree_lblock_verify_crc( struct xfs_buf *bp) { if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_LBLOCK_CRC_OFF); + return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + return true; } @@ -251,8 +250,7 @@ xfs_btree_sblock_calc_crc( return; if (bip) block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_SBLOCK_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); } bool @@ -260,8 +258,8 @@ xfs_btree_sblock_verify_crc( struct xfs_buf *bp) { if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) - return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_BTREE_SBLOCK_CRC_OFF); + return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + return true; } diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c index 53414f5..154adb1 100644 --- a/libxfs/xfs_da_btree.c +++ b/libxfs/xfs_da_btree.c @@ -209,8 +209,8 @@ xfs_da3_node_write_verify( struct xfs_da3_node_hdr *hdr3 = bp->b_addr; if (!xfs_da3_node_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -220,7 +220,7 @@ xfs_da3_node_write_verify( if (bip) hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); } /* @@ -233,18 +233,20 @@ static void xfs_da3_node_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_da_blkinfo *info = bp->b_addr; switch (be16_to_cpu(info->magic)) { case XFS_DA3_NODE_MAGIC: - if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DA3_NODE_CRC_OFF)) + if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { + xfs_buf_ioerror(bp, EFSBADCRC); break; + } /* fall through */ case XFS_DA_NODE_MAGIC: - if (!xfs_da3_node_verify(bp)) + if (!xfs_da3_node_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); break; + } return; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: @@ -261,8 +263,7 @@ xfs_da3_node_read_verify( } /* corrupt block */ - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); } const struct xfs_buf_ops xfs_da3_node_buf_ops = { diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c index 96a3c1d..4c8c836 100644 --- a/libxfs/xfs_dir2.c +++ b/libxfs/xfs_dir2.c @@ -20,6 +20,22 @@ struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; +/* + * @mode, if set, indicates that the type field needs to be set up. + * This uses the transformation from file mode to DT_* as defined in linux/fs.h + * for file type specification. This will be propagated into the directory + * structure if appropriate for the given operation and filesystem config. + */ +const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { + [0] = XFS_DIR3_FT_UNKNOWN, + [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, + [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, + [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, + [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, + [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, + [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, + [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, +}; /* * ASCII case-insensitive (ie. A-Z) support for directories that was diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c index 1d8f598..cede01f 100644 --- a/libxfs/xfs_dir2_block.c +++ b/libxfs/xfs_dir2_block.c @@ -70,13 +70,14 @@ xfs_dir3_block_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_DATA_CRC_OFF)) || - !xfs_dir3_block_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_block_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -88,8 +89,8 @@ xfs_dir3_block_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_block_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -99,7 +100,7 @@ xfs_dir3_block_write_verify( if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_block_buf_ops = { diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c index 189699f..dc9df4d 100644 --- a/libxfs/xfs_dir2_data.c +++ b/libxfs/xfs_dir2_data.c @@ -208,7 +208,6 @@ static void xfs_dir3_data_reada_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; struct xfs_dir2_data_hdr *hdr = bp->b_addr; switch (hdr->magic) { @@ -222,8 +221,8 @@ xfs_dir3_data_reada_verify( xfs_dir3_data_verify(bp); return; default: - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); break; } } @@ -234,13 +233,14 @@ xfs_dir3_data_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_DATA_CRC_OFF)) || - !xfs_dir3_data_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_data_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -252,8 +252,8 @@ xfs_dir3_data_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_data_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -263,7 +263,7 @@ xfs_dir3_data_write_verify( if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_data_buf_ops = { diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c index 683536e..8e0cbc9 100644 --- a/libxfs/xfs_dir2_leaf.c +++ b/libxfs/xfs_dir2_leaf.c @@ -206,13 +206,14 @@ __read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_LEAF_CRC_OFF)) || - !xfs_dir3_leaf_verify(bp, magic)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_leaf_verify(bp, magic)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -225,8 +226,8 @@ __write_verify( struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_leaf_verify(bp, magic)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -236,7 +237,7 @@ __write_verify( if (bip) hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); } static void diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c index ced8c58..3256756 100644 --- a/libxfs/xfs_dir2_node.c +++ b/libxfs/xfs_dir2_node.c @@ -98,13 +98,14 @@ xfs_dir3_free_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if ((xfs_sb_version_hascrc(&mp->m_sb) && - !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - XFS_DIR3_FREE_CRC_OFF)) || - !xfs_dir3_free_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dir3_free_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -116,8 +117,8 @@ xfs_dir3_free_write_verify( struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_dir3_free_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -127,7 +128,7 @@ xfs_dir3_free_write_verify( if (bip) hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); + xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); } const struct xfs_buf_ops xfs_dir3_free_buf_ops = { diff --git a/libxfs/xfs_dquot_buf.c b/libxfs/xfs_dquot_buf.c index 6bbb0ff..e089ec8 100644 --- a/libxfs/xfs_dquot_buf.c +++ b/libxfs/xfs_dquot_buf.c @@ -237,10 +237,13 @@ xfs_dquot_buf_read_verify( { struct xfs_mount *mp = bp->b_target->bt_mount; - if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (!xfs_dquot_buf_verify_crc(mp, bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_dquot_buf_verify(mp, bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } /* @@ -255,8 +258,8 @@ xfs_dquot_buf_write_verify( struct xfs_mount *mp = bp->b_target->bt_mount; if (!xfs_dquot_buf_verify(mp, bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } } diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c index afe1a82..c19d84a 100644 --- a/libxfs/xfs_ialloc.c +++ b/libxfs/xfs_ialloc.c @@ -1551,18 +1551,17 @@ xfs_agi_read_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_target->bt_mount; - int agi_ok = 1; - if (xfs_sb_version_hascrc(&mp->m_sb)) - agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agi, agi_crc)); - agi_ok = agi_ok && xfs_agi_verify(bp); - - if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, - XFS_RANDOM_IALLOC_READ_AGI))) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, + XFS_ERRTAG_IALLOC_READ_AGI, + XFS_RANDOM_IALLOC_READ_AGI)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -1573,8 +1572,8 @@ xfs_agi_write_verify( struct xfs_buf_log_item *bip = bp->b_fspriv; if (!xfs_agi_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -1583,8 +1582,7 @@ xfs_agi_write_verify( if (bip) XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_agi, agi_crc)); + xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); } const struct xfs_buf_ops xfs_agi_buf_ops = { diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c index 27a5dd9..0a29d73 100644 --- a/libxfs/xfs_ialloc_btree.c +++ b/libxfs/xfs_ialloc_btree.c @@ -224,12 +224,14 @@ static void xfs_inobt_read_verify( struct xfs_buf *bp) { - if (!(xfs_btree_sblock_verify_crc(bp) && - xfs_inobt_verify(bp))) { - trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); + if (!xfs_btree_sblock_verify_crc(bp)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_inobt_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) { + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_verifier_error(bp); } } @@ -239,9 +241,9 @@ xfs_inobt_write_verify( { if (!xfs_inobt_verify(bp)) { trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - bp->b_target->bt_mount, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; } xfs_btree_sblock_calc_crc(bp); diff --git a/libxfs/xfs_inode_buf.c b/libxfs/xfs_inode_buf.c index d245d72..de16ed9 100644 --- a/libxfs/xfs_inode_buf.c +++ b/libxfs/xfs_inode_buf.c @@ -88,8 +88,7 @@ xfs_inode_buf_verify( } xfs_buf_ioerror(bp, EFSCORRUPTED); - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, - mp, dip); + xfs_verifier_error(bp); #ifdef DEBUG xfs_alert(mp, "bad inode magic/vsn daddr %lld #%d (magic=%x)", @@ -292,7 +291,7 @@ xfs_dinode_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return false; if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, - offsetof(struct xfs_dinode, di_crc))) + XFS_DINODE_CRC_OFF)) return false; if (be64_to_cpu(dip->di_ino) != ino) return false; @@ -313,7 +312,7 @@ xfs_dinode_calc_crc( ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, - offsetof(struct xfs_dinode, di_crc)); + XFS_DINODE_CRC_OFF); dip->di_crc = xfs_end_cksum(crc); } diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c index 48b1a97..7ee4612 100644 --- a/libxfs/xfs_sb.c +++ b/libxfs/xfs_sb.c @@ -258,6 +258,7 @@ xfs_mount_validate_sb( sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || + sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || @@ -265,8 +266,7 @@ xfs_mount_validate_sb( sbp->sb_dblocks == 0 || sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { - XFS_CORRUPTION_ERROR("SB sanity check failed", - XFS_ERRLEVEL_LOW, mp, sbp); + xfs_notice(mp, "SB sanity check failed"); return XFS_ERROR(EFSCORRUPTED); } @@ -542,6 +542,11 @@ xfs_sb_verify( * single bit error could clear the feature bit and unused parts of the * superblock are supposed to be zero. Hence a non-null crc field indicates that * we've potentially lost a feature bit and we should check it anyway. + * + * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the + * last field in V4 secondary superblocks. So for secondary superblocks, + * we are more forgiving, and ignore CRC failures if the primary doesn't + * indicate that the fs version is V5. */ static void xfs_sb_read_verify( @@ -560,20 +565,22 @@ xfs_sb_read_verify( XFS_SB_VERSION_5) || dsb->sb_crc != 0)) { - if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), - offsetof(struct xfs_sb, sb_crc))) { - error = EFSCORRUPTED; - goto out_error; + if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { + /* Only fail bad secondaries on a known V5 filesystem */ + if (bp->b_bn == XFS_SB_DADDR || + xfs_sb_version_hascrc(&mp->m_sb)) { + error = EFSBADCRC; + goto out_error; + } } } error = xfs_sb_verify(bp, true); out_error: if (error) { - if (error != EWRONGFS) - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - mp, bp->b_addr); xfs_buf_ioerror(bp, error); + if (error == EFSCORRUPTED || error == EFSBADCRC) + xfs_verifier_error(bp); } } @@ -589,7 +596,6 @@ xfs_sb_quiet_read_verify( { struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); - if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ xfs_sb_read_verify(bp); @@ -609,9 +615,8 @@ xfs_sb_write_verify( error = xfs_sb_verify(bp, false); if (error) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, - mp, bp->b_addr); xfs_buf_ioerror(bp, error); + xfs_verifier_error(bp); return; } @@ -621,8 +626,7 @@ xfs_sb_write_verify( if (bip) XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_sb, sb_crc)); + xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); } const struct xfs_buf_ops xfs_sb_buf_ops = { diff --git a/libxfs/xfs_symlink_remote.c b/libxfs/xfs_symlink_remote.c index 539db0c..ebf60ac 100644 --- a/libxfs/xfs_symlink_remote.c +++ b/libxfs/xfs_symlink_remote.c @@ -116,12 +116,13 @@ xfs_symlink_read_verify( if (!xfs_sb_version_hascrc(&mp->m_sb)) return; - if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_dsymlink_hdr, sl_crc)) || - !xfs_symlink_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_symlink_verify(bp)) xfs_buf_ioerror(bp, EFSCORRUPTED); - } + + if (bp->b_error) + xfs_verifier_error(bp); } static void @@ -136,8 +137,8 @@ xfs_symlink_write_verify( return; if (!xfs_symlink_verify(bp)) { - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); return; } @@ -145,8 +146,7 @@ xfs_symlink_write_verify( struct xfs_dsymlink_hdr *dsl = bp->b_addr; dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); } - xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), - offsetof(struct xfs_dsymlink_hdr, sl_crc)); + xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); } const struct xfs_buf_ops xfs_symlink_buf_ops = { diff --git a/logprint/log_misc.c b/logprint/log_misc.c index cf9d59d..52f1e85 100644 --- a/logprint/log_misc.c +++ b/logprint/log_misc.c @@ -810,7 +810,6 @@ xlog_print_trans_icreate( memmove(&icl_buf, *ptr, MIN(sizeof(struct xfs_icreate_log), len)); icl = &icl_buf; - (*i)++; *ptr += len; /* handle complete header only */ @@ -874,7 +873,7 @@ xlog_print_record( int bad_hdr_warn) { xfs_caddr_t buf, ptr; - int read_len, skip; + int read_len, skip, lost_context = 0; int ret, n, i, j, k; if (print_no_print) @@ -995,7 +994,10 @@ xlog_print_record( if (xlog_print_find_tid(be32_to_cpu(op_head->oh_tid), op_head->oh_flags & XLOG_WAS_CONT_TRANS)) { printf(_("Left over region from split log item\n")); + /* Skip this leftover bit */ ptr += be32_to_cpu(op_head->oh_len); + /* We've lost context; don't complain if next one looks bad too */ + lost_context = 1; continue; } @@ -1050,7 +1052,7 @@ xlog_print_record( break; } default: { - if (bad_hdr_warn) { + if (bad_hdr_warn && !lost_context) { fprintf(stderr, _("%s: unknown log operation type (%x)\n"), progname, *(unsigned short *)ptr); @@ -1064,6 +1066,7 @@ xlog_print_record( } skip = 0; ptr += be32_to_cpu(op_head->oh_len); + lost_context = 0; } } /* switch */ } /* else */ diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8 index 767b50e..0dec0b7 100644 --- a/man/man8/xfs_io.8 +++ b/man/man8/xfs_io.8 @@ -4,7 +4,7 @@ xfs_io \- debug the I/O path of an XFS filesystem .SH SYNOPSIS .B xfs_io [ -.B \-adfmrRstx +.B \-adfmrRstxT ] [ .B \-c .I cmd @@ -88,7 +88,7 @@ command for more details on any command. Display a list of all open files and (optionally) switch to an alternate current open file. .TP -.BI "open [[ \-acdfrstR ] " path " ]" +.BI "open [[ \-acdfrstRT ] " path " ]" Closes the current file, and opens the file specified by .I path instead. Without any arguments, displays statistics about the current @@ -119,6 +119,14 @@ truncates on open (O_TRUNC). .B \-n opens in non-blocking mode if possible (O_NONBLOCK). .TP +.B \-T +create a temporary file not linked into the filesystem namespace +(O_TMPFILE). The pathname passed must refer to a directory which +is treated as virtual parent for the newly created invisible file. +Can not be used together with the +.B \-r +option. +.TP .B \-R marks the file as a realtime XFS file after opening it, if it is not already marked as such. @@ -380,12 +388,23 @@ will set the FALLOC_FL_KEEP_SIZE flag as described in .PD .RE .TP +.BI fcollapse " offset length" +Call fallocate with FALLOC_FL_COLLAPSE_RANGE flag as described in the +.BR fallocate (2) +manual page to de-allocates blocks and eliminates the hole created in this process +by shifting data blocks into the hole. +.TP .BI fpunch " offset length" Punches (de-allocates) blocks in the file by calling fallocate with the FALLOC_FL_PUNCH_HOLE flag as described in the .BR fallocate (2) manual page. .TP +.BI fzero " offset length" +Call fallocate with FALLOC_FL_ZERO_RANGE flag as described in the +.BR fallocate (2) +manual page to allocate and zero blocks within the range. +.TP .BI truncate " offset" Truncates the current file at the given offset using .BR ftruncate (2). diff --git a/man/man8/xfs_metadump.8 b/man/man8/xfs_metadump.8 index 4fa1b1c..077fff5 100644 --- a/man/man8/xfs_metadump.8 +++ b/man/man8/xfs_metadump.8 @@ -4,7 +4,7 @@ xfs_metadump \- copy XFS filesystem metadata to a file .SH SYNOPSIS .B xfs_metadump [ -.B \-efgow +.B \-efFgow ] [ .B \-m .I max_extents @@ -86,6 +86,11 @@ file option). This can also happen if an image copy of a filesystem has been made into an ordinary file with .BR xfs_copy (8). .TP +.B \-F +Specifies that we want to continue even if the superblock magic is not correct. +If the source is truly not an XFS filesystem, the resulting image will be useless, +and xfs_metadump may crash. +.TP .B \-g Shows dump progress. This is sent to stdout if the .I target diff --git a/man/man8/xfs_repair.8 b/man/man8/xfs_repair.8 index ed20fb7..b7c2d8c 100644 --- a/man/man8/xfs_repair.8 +++ b/man/man8/xfs_repair.8 @@ -144,7 +144,7 @@ reduce repair times on concat based filesystems. .BI force_geometry Check the filesystem even if geometry information could not be validated. Geometry information can not be validated if only a single allocation -group and exist and thus we do not have a backup superblock available, or +group exists and thus we do not have a backup superblock available, or if there are two allocation groups and the two superblocks do not agree on the filesystem geometry. Only use this option if you validated the geometry yourself and know what you are doing. If In doubt run diff --git a/mkfs/proto.c b/mkfs/proto.c index 4cc0df6..4d3680d 100644 --- a/mkfs/proto.c +++ b/mkfs/proto.c @@ -438,6 +438,7 @@ parseproto( creds.cr_gid = (int)getnum(pp); xname.name = (uchar_t *)name; xname.len = name ? strlen(name) : 0; + xname.type = 0; tp = libxfs_trans_alloc(mp, 0); flags = XFS_ILOG_CORE; xfs_bmap_init(&flist, &first); @@ -453,6 +454,7 @@ parseproto( if (buf) free(buf); libxfs_trans_ijoin(tp, pip, 0); + xname.type = XFS_DIR3_FT_REG_FILE; newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); libxfs_trans_ihold(tp, pip); break; @@ -469,6 +471,7 @@ parseproto( libxfs_trans_ijoin(tp, pip, 0); + xname.type = XFS_DIR3_FT_REG_FILE; newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); libxfs_trans_ihold(tp, pip); libxfs_trans_log_inode(tp, ip, flags); @@ -490,6 +493,7 @@ parseproto( fail(_("Inode allocation failed"), error); } libxfs_trans_ijoin(tp, pip, 0); + xname.type = XFS_DIR3_FT_BLKDEV; newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); libxfs_trans_ihold(tp, pip); flags |= XFS_ILOG_DEV; @@ -504,6 +508,7 @@ parseproto( if (error) fail(_("Inode allocation failed"), error); libxfs_trans_ijoin(tp, pip, 0); + xname.type = XFS_DIR3_FT_CHRDEV; newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); libxfs_trans_ihold(tp, pip); flags |= XFS_ILOG_DEV; @@ -516,6 +521,7 @@ parseproto( if (error) fail(_("Inode allocation failed"), error); libxfs_trans_ijoin(tp, pip, 0); + xname.type = XFS_DIR3_FT_FIFO; newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); libxfs_trans_ihold(tp, pip); break; @@ -529,6 +535,7 @@ parseproto( fail(_("Inode allocation failed"), error); flags |= newfile(tp, ip, &flist, &first, 1, 1, buf, len); libxfs_trans_ijoin(tp, pip, 0); + xname.type = XFS_DIR3_FT_SYMLINK; newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); libxfs_trans_ihold(tp, pip); break; @@ -546,6 +553,7 @@ parseproto( isroot = 1; } else { libxfs_trans_ijoin(tp, pip, 0); + xname.type = XFS_DIR3_FT_DIR; newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); pip->i_d.di_nlink++; diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index d82128c..f7cf394 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -2366,32 +2366,40 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), } else if (!loginternal && !xi.logdev) { logblocks = 0; } else if (loginternal && !logsize) { - /* - * With a 2GB max log size, default to maximum size - * at 4TB. This keeps the same ratio from the older - * max log size of 128M at 256GB fs size. IOWs, - * the ratio of fs size to log size is 2048:1. - */ - logblocks = (dblocks << blocklog) / 2048; - logblocks = logblocks >> blocklog; - logblocks = MAX(min_logblocks, logblocks); - /* - * If the default log size doesn't fit in the AG size, use the - * minimum log size instead. This ensures small filesystems - * don't use excessive amounts of space for the log. - */ - if (min_logblocks * XFS_DFL_LOG_FACTOR >= agsize) { + if (dblocks < GIGABYTES(1, blocklog)) { + /* tiny filesystems get minimum sized logs. */ logblocks = min_logblocks; + } else if (dblocks < GIGABYTES(16, blocklog)) { + + /* + * For small filesystems, we want to use the + * XFS_MIN_LOG_BYTES for filesystems smaller than 16G if + * at all possible, ramping up to 128MB at 256GB. + */ + logblocks = MIN(XFS_MIN_LOG_BYTES >> blocklog, + min_logblocks * XFS_DFL_LOG_FACTOR); } else { - logblocks = MAX(logblocks, - MAX(XFS_DFL_LOG_SIZE, - min_logblocks * XFS_DFL_LOG_FACTOR)); + /* + * With a 2GB max log size, default to maximum size + * at 4TB. This keeps the same ratio from the older + * max log size of 128M at 256GB fs size. IOWs, + * the ratio of fs size to log size is 2048:1. + */ + logblocks = (dblocks << blocklog) / 2048; + logblocks = logblocks >> blocklog; + logblocks = MAX(min_logblocks, logblocks); } + + /* make sure the log fits wholly within an AG */ + if (logblocks >= agsize) + logblocks = min_logblocks; + + /* and now clamp the size to the maximum supported size */ logblocks = MIN(logblocks, XFS_MAX_LOG_BLOCKS); - if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) { + if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) logblocks = XFS_MAX_LOG_BYTES >> blocklog; - } + } validate_log_size(logblocks, blocklog, min_logblocks); diff --git a/repair/agheader.c b/repair/agheader.c index 53e47b6..fc5dac9 100644 --- a/repair/agheader.c +++ b/repair/agheader.c @@ -472,7 +472,7 @@ verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb, int status = XR_OK; int status_sb = XR_OK; - status = verify_sb(sb, (i == 0)); + status = verify_sb(sbuf->b_addr, sb, (i == 0)); if (status != XR_OK) { do_warn(_("bad on-disk superblock %d - %s\n"), diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c index d3c2236..afb26e0 100644 --- a/repair/dino_chunks.c +++ b/repair/dino_chunks.c @@ -141,7 +141,7 @@ verify_inode_chunk(xfs_mount_t *mp, if (check_aginode_block(mp, agno, agino) == 0) return 0; - pthread_mutex_lock(&ag_locks[agno]); + pthread_mutex_lock(&ag_locks[agno].lock); state = get_bmap(agno, agbno); switch (state) { @@ -166,7 +166,7 @@ verify_inode_chunk(xfs_mount_t *mp, _("inode block %d/%d multiply claimed, (state %d)\n"), agno, agbno, state); set_bmap(agno, agbno, XR_E_MULT); - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); return(0); default: do_warn( @@ -176,7 +176,7 @@ verify_inode_chunk(xfs_mount_t *mp, break; } - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0); *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino); @@ -424,7 +424,7 @@ verify_inode_chunk(xfs_mount_t *mp, * user data -- we're probably here as a result of a directory * entry or an iunlinked pointer */ - pthread_mutex_lock(&ag_locks[agno]); + pthread_mutex_lock(&ag_locks[agno].lock); for (cur_agbno = chunk_start_agbno; cur_agbno < chunk_stop_agbno; cur_agbno += blen) { @@ -438,7 +438,7 @@ verify_inode_chunk(xfs_mount_t *mp, _("inode block %d/%d multiply claimed, (state %d)\n"), agno, cur_agbno, state); set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT); - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); return 0; case XR_E_INO: do_error( @@ -449,7 +449,7 @@ verify_inode_chunk(xfs_mount_t *mp, break; } } - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); /* * ok, chunk is good. put the record into the tree if required, @@ -472,7 +472,7 @@ verify_inode_chunk(xfs_mount_t *mp, set_inode_used(irec_p, agino - start_agino); - pthread_mutex_lock(&ag_locks[agno]); + pthread_mutex_lock(&ag_locks[agno].lock); for (cur_agbno = chunk_start_agbno; cur_agbno < chunk_stop_agbno; @@ -505,7 +505,7 @@ verify_inode_chunk(xfs_mount_t *mp, break; } } - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); return(ino_cnt); } @@ -736,7 +736,7 @@ process_inode_chunk( /* * mark block as an inode block in the incore bitmap */ - pthread_mutex_lock(&ag_locks[agno]); + pthread_mutex_lock(&ag_locks[agno].lock); state = get_bmap(agno, agbno); switch (state) { case XR_E_INO: /* already marked */ @@ -755,7 +755,7 @@ process_inode_chunk( XFS_AGB_TO_FSB(mp, agno, agbno), state); break; } - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); for (;;) { /* @@ -788,6 +788,8 @@ process_inode_chunk( * we do now, this is where to start. */ if (is_used) { + __uint16_t di_mode; + if (is_inode_free(ino_rec, irec_offset)) { if (verbose || no_modify) { do_warn( @@ -803,6 +805,15 @@ process_inode_chunk( set_inode_used(ino_rec, irec_offset); /* + * store the on-disk file type for comparing in + * phase 6. + */ + di_mode = be16_to_cpu(dino->di_mode); + di_mode = (di_mode & S_IFMT) >> S_SHIFT; + set_inode_ftype(ino_rec, irec_offset, + xfs_mode_to_ftype[di_mode]); + + /* * store on-disk nlink count for comparing in phase 7 */ set_inode_disk_nlinks(ino_rec, irec_offset, @@ -914,7 +925,7 @@ process_inode_chunk( ibuf_offset = 0; agbno++; - pthread_mutex_lock(&ag_locks[agno]); + pthread_mutex_lock(&ag_locks[agno].lock); state = get_bmap(agno, agbno); switch (state) { case XR_E_INO: /* already marked */ @@ -935,7 +946,7 @@ process_inode_chunk( XFS_AGB_TO_FSB(mp, agno, agbno), state); break; } - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); } else if (irec_offset == XFS_INODES_PER_CHUNK) { /* diff --git a/repair/dinode.c b/repair/dinode.c index 7469fc8..48f17ac 100644 --- a/repair/dinode.c +++ b/repair/dinode.c @@ -32,6 +32,37 @@ #include "threads.h" /* + * gettext lookups for translations of strings use mutexes internally to + * the library. Hence when we come through here doing parallel scans in + * multiple AGs, then all do concurrent text conversions and serialise + * on the translation string lookups. Let's avoid doing repeated lookups + * by making them static variables and only assigning the translation + * once. + */ +static char *forkname_data; +static char *forkname_attr; +static char *ftype_real_time; +static char *ftype_regular; + +void +dinode_bmbt_translation_init(void) +{ + forkname_data = _("data"); + forkname_attr = _("attr"); + ftype_real_time = _("real-time"); + ftype_regular = _("regular"); +} + +char * +get_forkname(int whichfork) +{ + + if (whichfork == XFS_DATA_FORK) + return forkname_data; + return forkname_attr; +} + +/* * inode clearing routines */ @@ -542,7 +573,7 @@ process_bmbt_reclist_int( xfs_dfiloff_t op = 0; /* prev offset */ xfs_dfsbno_t b; char *ftype; - char *forkname; + char *forkname = get_forkname(whichfork); int i; int state; xfs_agnumber_t agno; @@ -552,15 +583,10 @@ process_bmbt_reclist_int( xfs_agnumber_t locked_agno = -1; int error = 1; - if (whichfork == XFS_DATA_FORK) - forkname = _("data"); - else - forkname = _("attr"); - if (type == XR_INO_RTDATA) - ftype = _("real-time"); + ftype = ftype_real_time; else - ftype = _("regular"); + ftype = ftype_regular; for (i = 0; i < *numrecs; i++) { libxfs_bmbt_disk_get_all(rp + i, &irec); @@ -651,9 +677,10 @@ _("inode %" PRIu64 " - extent offset too large - start %" PRIu64 ", " } if (blkmapp && *blkmapp) { - error = blkmap_set_ext(blkmapp, irec.br_startoff, + int error2; + error2 = blkmap_set_ext(blkmapp, irec.br_startoff, irec.br_startblock, irec.br_blockcount); - if (error) { + if (error2) { /* * we don't want to clear the inode due to an * internal bmap tracking error, but if we've @@ -665,7 +692,7 @@ _("inode %" PRIu64 " - extent offset too large - start %" PRIu64 ", " do_abort( _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n" "\t%s fork, off - %" PRIu64 ", start - %" PRIu64 ", cnt %" PRIu64 "\n"), - ino, strerror(error), forkname, + ino, strerror(error2), forkname, irec.br_startoff, irec.br_startblock, irec.br_blockcount); } @@ -680,8 +707,8 @@ _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n" ebno = agbno + irec.br_blockcount; if (agno != locked_agno) { if (locked_agno != -1) - pthread_mutex_unlock(&ag_locks[locked_agno]); - pthread_mutex_lock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[locked_agno].lock); + pthread_mutex_lock(&ag_locks[agno].lock); locked_agno = agno; } @@ -750,7 +777,7 @@ _("illegal state %d in block map %" PRIu64 "\n"), error = 0; done: if (locked_agno != -1) - pthread_mutex_unlock(&ag_locks[locked_agno]); + pthread_mutex_unlock(&ag_locks[locked_agno].lock); if (i != *numrecs) { ASSERT(i < *numrecs); @@ -1109,7 +1136,7 @@ process_btinode( xfs_ino_t lino; xfs_bmbt_ptr_t *pp; xfs_bmbt_key_t *pkey; - char *forkname; + char *forkname = get_forkname(whichfork); int i; int level; int numrecs; @@ -1121,11 +1148,6 @@ process_btinode( *tot = 0; *nex = 0; - if (whichfork == XFS_DATA_FORK) - forkname = _("data"); - else - forkname = _("attr"); - magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_BMAP_CRC_MAGIC : XFS_BMAP_MAGIC; diff --git a/repair/dinode.h b/repair/dinode.h index d9197c1..5ee51ca 100644 --- a/repair/dinode.h +++ b/repair/dinode.h @@ -18,9 +18,8 @@ #ifndef _XR_DINODE_H #define _XR_DINODE_H -#include "prefetch.h" - struct blkmap; +struct prefetch_args; int verify_agbno(xfs_mount_t *mp, @@ -103,12 +102,12 @@ int process_uncertain_aginodes(xfs_mount_t *mp, xfs_agnumber_t agno); void -process_aginodes(xfs_mount_t *mp, - prefetch_args_t *pf_args, - xfs_agnumber_t agno, - int check_dirs, - int check_dups, - int extra_attr_check); +process_aginodes(xfs_mount_t *mp, + struct prefetch_args *pf_args, + xfs_agnumber_t agno, + int check_dirs, + int check_dups, + int extra_attr_check); void check_uncertain_aginodes(xfs_mount_t *mp, @@ -127,4 +126,7 @@ get_bmapi(xfs_mount_t *mp, xfs_dfiloff_t bno, int whichfork ); +void dinode_bmbt_translation_init(void); +char * get_forkname(int whichfork); + #endif /* _XR_DINODE_H */ diff --git a/repair/dir2.c b/repair/dir2.c index 3aabcaa..06dc000 100644 --- a/repair/dir2.c +++ b/repair/dir2.c @@ -552,7 +552,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"), newnode = bp->b_addr; btree = xfs_da3_node_tree_p(newnode); - xfs_da3_node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&nodehdr, newnode); /* * verify magic number and back pointer, sanity-check * entry count, verify level diff --git a/repair/globals.h b/repair/globals.h index aef8b79..f6e0a22 100644 --- a/repair/globals.h +++ b/repair/globals.h @@ -49,7 +49,8 @@ #define XR_BAD_SB_UNIT 17 /* bad stripe unit */ #define XR_BAD_SB_WIDTH 18 /* bad stripe width */ #define XR_BAD_SVN 19 /* bad shared version number */ -#define XR_BAD_ERR_CODE 20 /* Bad error code */ +#define XR_BAD_CRC 20 /* Bad CRC */ +#define XR_BAD_ERR_CODE 21 /* Bad error code */ /* XFS filesystem (il)legal values */ @@ -186,7 +187,10 @@ EXTERN xfs_extlen_t sb_inoalignmt; EXTERN __uint32_t sb_unit; EXTERN __uint32_t sb_width; -EXTERN pthread_mutex_t *ag_locks; +struct aglock { + pthread_mutex_t lock __attribute__((__aligned__(64))); +}; +EXTERN struct aglock *ag_locks; EXTERN int report_interval; EXTERN __uint64_t *prog_rpt_done; diff --git a/repair/incore.c b/repair/incore.c index 3590464..a8d497e 100644 --- a/repair/incore.c +++ b/repair/incore.c @@ -294,13 +294,13 @@ init_bmaps(xfs_mount_t *mp) if (!ag_bmap) do_error(_("couldn't allocate block map btree roots\n")); - ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(pthread_mutex_t)); + ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(struct aglock)); if (!ag_locks) do_error(_("couldn't allocate block map locks\n")); for (i = 0; i < mp->m_sb.sb_agcount; i++) { btree_init(&ag_bmap[i]); - pthread_mutex_init(&ag_locks[i], NULL); + pthread_mutex_init(&ag_locks[i].lock, NULL); } init_rt_bmap(mp); diff --git a/repair/incore.h b/repair/incore.h index 38caa6d..5419884 100644 --- a/repair/incore.h +++ b/repair/incore.h @@ -293,6 +293,7 @@ typedef struct ino_tree_node { ino_ex_data_t *ex_data; /* phases 6,7 */ parent_list_t *plist; /* phases 2-5 */ } ino_un; + __uint8_t *ftypes; /* phases 3,6 */ } ino_tree_node_t; #define INOS_PER_IREC (sizeof(__uint64_t) * NBBY) @@ -359,7 +360,8 @@ ino_tree_node_t *find_uncertain_inode_rec(xfs_agnumber_t agno, xfs_agino_t ino); void add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free); -void add_aginode_uncertain(xfs_agnumber_t agno, +void add_aginode_uncertain(struct xfs_mount *mp, + xfs_agnumber_t agno, xfs_agino_t agino, int free); void get_uncertain_inode_rec(struct xfs_mount *mp, xfs_agnumber_t agno, @@ -476,6 +478,29 @@ static inline void add_inode_reached(struct ino_tree_node *irec, int offset) } /* + * get/set inode filetype. Only used if the superblock feature bit is set + * which allocates irec->ftypes. + */ +static inline void +set_inode_ftype(struct ino_tree_node *irec, + int ino_offset, + __uint8_t ftype) +{ + if (irec->ftypes) + irec->ftypes[ino_offset] = ftype; +} + +static inline __uint8_t +get_inode_ftype( + struct ino_tree_node *irec, + int ino_offset) +{ + if (!irec->ftypes) + return XFS_DIR3_FT_UNKNOWN; + return irec->ftypes[ino_offset]; +} + +/* * set/get inode number of parent -- works for directory inodes only */ void set_inode_parent(ino_tree_node_t *irec, int ino_offset, diff --git a/repair/incore_ino.c b/repair/incore_ino.c index 735737a..9502648 100644 --- a/repair/incore_ino.c +++ b/repair/incore_ino.c @@ -211,6 +211,21 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) return 0; } +static __uint8_t * +alloc_ftypes_array( + struct xfs_mount *mp) +{ + __uint8_t *ptr; + + if (!xfs_sb_version_hasftype(&mp->m_sb)) + return NULL; + + ptr = calloc(XFS_INODES_PER_CHUNK, sizeof(*ptr)); + if (!ptr) + do_error(_("could not allocate ftypes array\n")); + return ptr; +} + /* * Next is the uncertain inode list -- a sorted (in ascending order) * list of inode records sorted on the starting inode number. There @@ -226,6 +241,7 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) */ static struct ino_tree_node * alloc_ino_node( + struct xfs_mount *mp, xfs_agino_t starting_ino) { struct ino_tree_node *irec; @@ -245,6 +261,7 @@ alloc_ino_node( irec->ino_un.ex_data = NULL; irec->nlink_size = sizeof(__uint8_t); irec->disk_nlinks.un8 = alloc_nlink_array(irec->nlink_size); + irec->ftypes = alloc_ftypes_array(mp); return irec; } @@ -285,6 +302,7 @@ free_ino_tree_node( } + free(irec->ftypes); free(irec); } @@ -303,7 +321,11 @@ static ino_tree_node_t **last_rec; * free is set to 1 if the inode is thought to be free, 0 if used */ void -add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) +add_aginode_uncertain( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t ino, + int free) { ino_tree_node_t *ino_rec; xfs_agino_t s_ino; @@ -334,7 +356,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) ino_rec = (ino_tree_node_t *) avl_findrange(inode_uncertain_tree_ptrs[agno], s_ino); if (!ino_rec) { - ino_rec = alloc_ino_node(s_ino); + ino_rec = alloc_ino_node(mp, s_ino); if (!avl_insert(inode_uncertain_tree_ptrs[agno], &ino_rec->avl_node)) @@ -360,7 +382,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) void add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free) { - add_aginode_uncertain(XFS_INO_TO_AGNO(mp, ino), + add_aginode_uncertain(mp, XFS_INO_TO_AGNO(mp, ino), XFS_INO_TO_AGINO(mp, ino), free); } @@ -432,7 +454,7 @@ add_inode( { struct ino_tree_node *irec; - irec = alloc_ino_node(agino); + irec = alloc_ino_node(mp, agino); if (!avl_insert(inode_tree_ptrs[agno], &irec->avl_node)) do_warn(_("add_inode - duplicate inode range\n")); return irec; diff --git a/repair/init.c b/repair/init.c index c3f380b..d0940aa 100644 --- a/repair/init.c +++ b/repair/init.c @@ -97,8 +97,17 @@ xfs_init(libxfs_init_t *args) else args->isreadonly = LIBXFS_EXCLUSIVELY; - if (!libxfs_init(args)) + if (!libxfs_init(args)) { + /* would -d be an option? */ + if (!no_modify && !dangerously) { + args->isreadonly = (LIBXFS_ISINACTIVE | + LIBXFS_DANGEROUSLY); + if (libxfs_init(args)) + fprintf(stderr, +_("Unmount or use the dangerous (-d) option to repair a read-only mounted filesystem\n")); + } do_error(_("couldn't initialize XFS library\n")); + } ts_create(); increase_rlimit(); diff --git a/repair/phase1.c b/repair/phase1.c index 62de211..ec75ada 100644 --- a/repair/phase1.c +++ b/repair/phase1.c @@ -70,13 +70,14 @@ phase1(xfs_mount_t *mp) ag_bp = alloc_ag_buf(MAX_SECTSIZE); sb = (xfs_sb_t *) ag_bp; - if (get_sb(sb, 0LL, MAX_SECTSIZE, 0) == XR_EOF) + rval = get_sb(sb, 0LL, MAX_SECTSIZE, 0); + if (rval == XR_EOF) do_error(_("error reading primary superblock\n")); /* * is this really an sb, verify internal consistency */ - if ((rval = verify_sb(sb, 1)) != XR_OK) { + if (rval != XR_OK) { do_warn(_("bad primary superblock - %s !!!\n"), err_string(rval)); if (!find_secondary_sb(sb)) diff --git a/repair/phase3.c b/repair/phase3.c index 3e43938..213d368 100644 --- a/repair/phase3.c +++ b/repair/phase3.c @@ -17,6 +17,8 @@ */ #include +#include "threads.h" +#include "prefetch.h" #include "avl.h" #include "globals.h" #include "agheader.h" @@ -24,9 +26,7 @@ #include "protos.h" #include "err_protos.h" #include "dinode.h" -#include "threads.h" #include "progress.h" -#include "prefetch.h" static void process_agi_unlinked( @@ -82,41 +82,7 @@ static void process_ags( xfs_mount_t *mp) { - int i, j; - xfs_agnumber_t agno; - work_queue_t *queues; - prefetch_args_t *pf_args[2]; - - queues = malloc(thread_count * sizeof(work_queue_t)); - - if (ag_stride) { - /* - * create one worker thread for each segment of the volume - */ - for (i = 0, agno = 0; i < thread_count; i++) { - create_work_queue(&queues[i], mp, 1); - pf_args[0] = NULL; - for (j = 0; j < ag_stride && agno < mp->m_sb.sb_agcount; - j++, agno++) { - pf_args[0] = start_inode_prefetch(agno, 0, pf_args[0]); - queue_work(&queues[i], process_ag_func, agno, pf_args[0]); - } - } - /* - * wait for workers to complete - */ - for (i = 0; i < thread_count; i++) - destroy_work_queue(&queues[i]); - } else { - queues[0].mp = mp; - pf_args[0] = start_inode_prefetch(0, 0, NULL); - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 0, - pf_args[i & 1]); - process_ag_func(&queues[0], i, pf_args[i & 1]); - } - } - free(queues); + do_inode_prefetch(mp, ag_stride, process_ag_func, false, false); } void diff --git a/repair/phase4.c b/repair/phase4.c index a822aaa..189eeb9 100644 --- a/repair/phase4.c +++ b/repair/phase4.c @@ -17,6 +17,8 @@ */ #include +#include "threads.h" +#include "prefetch.h" #include "avl.h" #include "globals.h" #include "agheader.h" @@ -27,9 +29,7 @@ #include "bmap.h" #include "versions.h" #include "dir2.h" -#include "threads.h" #include "progress.h" -#include "prefetch.h" /* @@ -150,49 +150,7 @@ static void process_ags( xfs_mount_t *mp) { - int i, j; - xfs_agnumber_t agno; - work_queue_t *queues; - prefetch_args_t *pf_args[2]; - - queues = malloc(thread_count * sizeof(work_queue_t)); - - if (!libxfs_bcache_overflowed()) { - queues[0].mp = mp; - create_work_queue(&queues[0], mp, libxfs_nproc()); - for (i = 0; i < mp->m_sb.sb_agcount; i++) - queue_work(&queues[0], process_ag_func, i, NULL); - destroy_work_queue(&queues[0]); - } else { - if (ag_stride) { - /* - * create one worker thread for each segment of the volume - */ - for (i = 0, agno = 0; i < thread_count; i++) { - create_work_queue(&queues[i], mp, 1); - pf_args[0] = NULL; - for (j = 0; j < ag_stride && agno < mp->m_sb.sb_agcount; - j++, agno++) { - pf_args[0] = start_inode_prefetch(agno, 0, pf_args[0]); - queue_work(&queues[i], process_ag_func, agno, pf_args[0]); - } - } - /* - * wait for workers to complete - */ - for (i = 0; i < thread_count; i++) - destroy_work_queue(&queues[i]); - } else { - queues[0].mp = mp; - pf_args[0] = start_inode_prefetch(0, 0, NULL); - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - pf_args[(~i) & 1] = start_inode_prefetch(i + 1, - 0, pf_args[i & 1]); - process_ag_func(&queues[0], i, pf_args[i & 1]); - } - } - } - free(queues); + do_inode_prefetch(mp, ag_stride, process_ag_func, true, false); } diff --git a/repair/phase6.c b/repair/phase6.c index d2d4a44..446f3ee 100644 --- a/repair/phase6.c +++ b/repair/phase6.c @@ -17,6 +17,8 @@ */ #include +#include "threads.h" +#include "prefetch.h" #include "avl.h" #include "globals.h" #include "agheader.h" @@ -25,9 +27,7 @@ #include "protos.h" #include "err_protos.h" #include "dinode.h" -#include "prefetch.h" #include "progress.h" -#include "threads.h" #include "versions.h" static struct cred zerocr; @@ -43,13 +43,13 @@ static struct xfs_name xfs_name_dot = {(unsigned char *)".", * entries are updated. These must be rebuilt after the initial pass */ typedef struct dotdot_update { - struct dotdot_update *next; + struct list_head list; ino_tree_node_t *irec; xfs_agnumber_t agno; int ino_offset; } dotdot_update_t; -static dotdot_update_t *dotdot_update_list; +static LIST_HEAD(dotdot_update_list); static int dotdot_update; static void @@ -64,12 +64,12 @@ add_dotdot_update( do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"), sizeof(dotdot_update_t)); - dir->next = dotdot_update_list; + INIT_LIST_HEAD(&dir->list); dir->irec = irec; dir->agno = agno; dir->ino_offset = ino_offset; - dotdot_update_list = dir; + list_add(&dir->list, &dotdot_update_list); } /* @@ -134,7 +134,8 @@ dir_hash_add( __uint32_t addr, xfs_ino_t inum, int namelen, - unsigned char *name) + unsigned char *name, + __uint8_t ftype) { xfs_dahash_t hash = 0; int byaddr; @@ -148,6 +149,7 @@ dir_hash_add( xname.name = name; xname.len = namelen; + xname.type = ftype; junk = name[0] == '/'; byaddr = DIR_HASH_FUNC(hashtab, addr); @@ -312,6 +314,23 @@ dir_hash_see( return DIR_HASH_CK_NODATA; } +static void +dir_hash_update_ftype( + dir_hash_tab_t *hashtab, + xfs_dir2_dataptr_t addr, + __uint8_t ftype) +{ + int i; + dir_hash_ent_t *p; + + i = DIR_HASH_FUNC(hashtab, addr); + for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) { + if (p->address != addr) + continue; + p->name.type = ftype; + } +} + /* * checks to make sure leafs match a data entry, and that the stale * count is valid. @@ -1685,11 +1704,12 @@ longform_dir2_entry_check_data( if (!orphanage_ino) orphanage_ino = inum; } + /* * check for duplicate names in directory. */ if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen, - dep->name)) { + dep->name, xfs_dir3_dirent_get_ftype(mp, dep))) { nbad++; if (entry_junked( _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), @@ -1763,6 +1783,35 @@ longform_dir2_entry_check_data( */ if (no_modify && verify_inum(mp, inum)) continue; + + /* validate ftype field if supported */ + if (xfs_sb_version_hasftype(&mp->m_sb)) { + __uint8_t dir_ftype; + __uint8_t ino_ftype; + + dir_ftype = xfs_dir3_dirent_get_ftype(mp, dep); + ino_ftype = get_inode_ftype(irec, ino_offset); + + if (dir_ftype != ino_ftype) { + if (no_modify) { + do_warn( + _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ip->i_ino, inum); + } else { + do_warn( + _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ip->i_ino, inum); + xfs_dir3_dirent_put_ftype(mp, dep, + ino_ftype); + libxfs_dir2_data_log_entry(tp, bp, dep); + dir_hash_update_ftype(hashtab, addr, + ino_ftype); + } + } + } + /* * check easy case first, regular inode, just bump * the link count and continue @@ -2189,6 +2238,62 @@ out_fix: * shortform directory v2 processing routines -- entry verification and * bad entry deletion (pruning). */ +static struct xfs_dir2_sf_entry * +shortform_dir2_junk( + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, + struct xfs_dir2_sf_entry *sfep, + xfs_ino_t lino, + int *max_size, + int *index, + int *bytes_deleted, + int *ino_dirty) +{ + struct xfs_dir2_sf_entry *next_sfep; + int next_len; + int next_elen; + + if (lino == orphanage_ino) + orphanage_ino = 0; + + next_elen = xfs_dir3_sf_entsize(mp, sfp, sfep->namelen); + next_sfep = (xfs_dir2_sf_entry_t *)((__psint_t)sfep + next_elen); + + /* + * if we are just checking, simply return the pointer to the next entry + * here so that the checking loop can continue. + */ + if (no_modify) { + do_warn(_("would junk entry\n")); + return next_sfep; + } + + /* + * now move all the remaining entries down over the junked entry and + * clear the newly unused bytes at the tail of the directory region. + */ + next_len = *max_size - ((__psint_t)next_sfep - (__psint_t)sfp); + *max_size -= next_elen; + *bytes_deleted += next_elen; + + memmove(sfep, next_sfep, next_len); + memset((void *)((__psint_t)sfep + next_len), 0, next_elen); + sfp->count -= 1; + *ino_dirty = 1; + + /* + * WARNING: drop the index i by one so it matches the decremented count + * for accurate comparisons in the loop test + */ + (*index)--; + + if (verbose) + do_warn(_("junking entry\n")); + else + do_warn("\n"); + return sfep; +} + static void shortform_dir2_entry_check(xfs_mount_t *mp, xfs_ino_t ino, @@ -2201,15 +2306,13 @@ shortform_dir2_entry_check(xfs_mount_t *mp, xfs_ino_t lino; xfs_ino_t parent; struct xfs_dir2_sf_hdr *sfp; - xfs_dir2_sf_entry_t *sfep, *next_sfep, *tmp_sfep; - xfs_ifork_t *ifp; - ino_tree_node_t *irec; + struct xfs_dir2_sf_entry *sfep; + struct xfs_dir2_sf_entry *next_sfep; + struct xfs_ifork *ifp; + struct ino_tree_node *irec; int max_size; int ino_offset; int i; - int junkit; - int tmp_len; - int tmp_elen; int bad_sfnamelen; int namelen; int bytes_deleted; @@ -2266,9 +2369,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, for (i = 0; i < sfp->count && max_size > (__psint_t)next_sfep - (__psint_t)sfp; sfep = next_sfep, i++) { - junkit = 0; bad_sfnamelen = 0; - tmp_sfep = NULL; lino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); @@ -2340,7 +2441,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, do_warn( _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"), fname, ino, lino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, + &max_size, &i, &bytes_deleted, + ino_dirty); + continue; } ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum; @@ -2354,7 +2458,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, do_warn( _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"), fname, ino, lino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, + &max_size, &i, &bytes_deleted, + ino_dirty); + continue; } /* * check if this inode is lost+found dir in the root @@ -2367,7 +2474,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, do_warn( _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), ORPHANAGE, lino, ino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, + lino, &max_size, &i, + &bytes_deleted, ino_dirty); + continue; } /* * if this is a dup, it will be picked up below, @@ -2381,11 +2491,15 @@ shortform_dir2_entry_check(xfs_mount_t *mp, */ if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t) (sfep - xfs_dir2_sf_firstentry(sfp)), - lino, sfep->namelen, sfep->name)) { + lino, sfep->namelen, sfep->name, + xfs_dir3_sfe_get_ftype(mp, sfp, sfep))) { do_warn( _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), fname, lino, ino); - goto do_junkit; + next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, + &max_size, &i, &bytes_deleted, + ino_dirty); + continue; } if (!inode_isadir(irec, ino_offset)) { @@ -2403,11 +2517,14 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), * the .. in the child, blow out the entry */ if (is_inode_reached(irec, ino_offset)) { - junkit = 1; do_warn( _("entry \"%s\" in directory inode %" PRIu64 " references already connected inode %" PRIu64 ".\n"), fname, ino, lino); + next_sfep = shortform_dir2_junk(mp, sfp, sfep, + lino, &max_size, &i, + &bytes_deleted, ino_dirty); + continue; } else if (parent == ino) { add_inode_reached(irec, ino_offset); add_inode_ref(current_irec, current_ino_offset); @@ -2423,76 +2540,60 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), add_dotdot_update(XFS_INO_TO_AGNO(mp, lino), irec, ino_offset); } else { - junkit = 1; do_warn( _("entry \"%s\" in directory inode %" PRIu64 " not consistent with .. value (%" PRIu64 ") in inode %" PRIu64 ",\n"), fname, ino, parent, lino); + next_sfep = shortform_dir2_junk(mp, sfp, sfep, + lino, &max_size, &i, + &bytes_deleted, ino_dirty); + continue; } } - if (junkit) { -do_junkit: - if (lino == orphanage_ino) - orphanage_ino = 0; - if (!no_modify) { - tmp_elen = xfs_dir3_sf_entsize(mp, sfp, - sfep->namelen); - tmp_sfep = (xfs_dir2_sf_entry_t *) - ((__psint_t) sfep + tmp_elen); - tmp_len = max_size - ((__psint_t) tmp_sfep - - (__psint_t) sfp); - max_size -= tmp_elen; - bytes_deleted += tmp_elen; - - memmove(sfep, tmp_sfep, tmp_len); - - sfp->count -= 1; - memset((void *)((__psint_t)sfep + tmp_len), 0, - tmp_elen); + /* validate ftype field if supported */ + if (xfs_sb_version_hasftype(&mp->m_sb)) { + __uint8_t dir_ftype; + __uint8_t ino_ftype; - /* - * set the tmp value to the current - * pointer so we'll process the entry - * we just moved up - */ - tmp_sfep = sfep; - - /* - * WARNING: drop the index i by one - * so it matches the decremented count for - * accurate comparisons in the loop test - */ - i--; - - *ino_dirty = 1; + dir_ftype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep); + ino_ftype = get_inode_ftype(irec, ino_offset); - if (verbose) - do_warn(_("junking entry\n")); - else - do_warn("\n"); - } else { - do_warn(_("would junk entry\n")); + if (dir_ftype != ino_ftype) { + if (no_modify) { + do_warn( + _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ino, lino); + } else { + do_warn( + _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), + dir_ftype, ino_ftype, + ino, lino); + xfs_dir3_sfe_put_ftype(mp, sfp, sfep, + ino_ftype); + dir_hash_update_ftype(hashtab, + (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)), + ino_ftype); + *ino_dirty = 1; + } } - } else if (lino > XFS_DIR2_MAX_SHORT_INUM) + } + + if (lino > XFS_DIR2_MAX_SHORT_INUM) i8++; /* - * go onto next entry unless we've just junked an - * entry in which the current entry pointer points - * to an unprocessed entry. have to take into entries - * with bad namelen into account in no modify mode since we - * calculate size based on next_sfep. + * go onto next entry - we have to take entries with bad namelen + * into account in no modify mode since we calculate size based + * on next_sfep. */ ASSERT(no_modify || bad_sfnamelen == 0); - - next_sfep = (tmp_sfep == NULL) - ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep - + ((!bad_sfnamelen) - ? xfs_dir3_sf_entsize(mp, sfp, sfep->namelen) - : xfs_dir3_sf_entsize(mp, sfp, namelen))) - : tmp_sfep; + next_sfep = (struct xfs_dir2_sf_entry *)((__psint_t)sfep + + (bad_sfnamelen + ? xfs_dir3_sf_entsize(mp, sfp, namelen) + : xfs_dir3_sf_entsize(mp, sfp, sfep->namelen))); } if (sfp->i8count != i8) { @@ -2501,6 +2602,8 @@ do_junkit: ino); } else { if (i8 == 0) { + struct xfs_dir2_sf_entry *tmp_sfep; + tmp_sfep = next_sfep; process_sf_dir2_fixi8(mp, sfp, &tmp_sfep); bytes_deleted += @@ -2518,8 +2621,7 @@ do_junkit: /* * sync up sizes if required */ - if (*ino_dirty) { - ASSERT(bytes_deleted > 0); + if (*ino_dirty && bytes_deleted > 0) { ASSERT(!no_modify); libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK); ip->i_d.di_size -= bytes_deleted; @@ -2897,8 +2999,15 @@ traverse_function( if (irec->ino_isa_dir == 0) continue; - if (pf_args) + if (pf_args) { sem_post(&pf_args->ra_count); +#ifdef XR_PF_TRACE + sem_getvalue(&pf_args->ra_count, &i); + pftrace( + "processing inode chunk %p in AG %d (sem count = %d)", + irec, agno, i); +#endif + } for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { if (inode_isadir(irec, i)) @@ -2919,9 +3028,10 @@ update_missing_dotdot_entries( * set dotdot_update flag so processing routines do not count links */ dotdot_update = 1; - while (dotdot_update_list) { - dir = dotdot_update_list; - dotdot_update_list = dir->next; + while (!list_empty(&dotdot_update_list)) { + dir = list_entry(dotdot_update_list.prev, struct dotdot_update, + list); + list_del(&dir->list); process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset); free(dir); } @@ -2929,23 +3039,9 @@ update_missing_dotdot_entries( static void traverse_ags( - xfs_mount_t *mp) + struct xfs_mount *mp) { - int i; - work_queue_t queue; - prefetch_args_t *pf_args[2]; - - /* - * we always do prefetch for phase 6 as it will fill in the gaps - * not read during phase 3 prefetch. - */ - queue.mp = mp; - pf_args[0] = start_inode_prefetch(0, 1, NULL); - for (i = 0; i < glob_agcount; i++) { - pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 1, - pf_args[i & 1]); - traverse_function(&queue, i, pf_args[i & 1]); - } + do_inode_prefetch(mp, 0, traverse_function, false, true); } void diff --git a/repair/prefetch.c b/repair/prefetch.c index d3491da..e47a48e 100644 --- a/repair/prefetch.c +++ b/repair/prefetch.c @@ -105,11 +105,12 @@ pf_start_io_workers( static void pf_queue_io( prefetch_args_t *args, - xfs_fsblock_t fsbno, - int blen, + struct xfs_buf_map *map, + int nmaps, int flag) { - xfs_buf_t *bp; + struct xfs_buf *bp; + xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, map[0].bm_bn); /* * Never block on a buffer lock here, given that the actual repair @@ -117,8 +118,7 @@ pf_queue_io( * the lock holder is either reading it from disk himself or * completely overwriting it this behaviour is perfectly fine. */ - bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), - XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK); + bp = libxfs_getbuf_map(mp->m_dev, map, nmaps, LIBXFS_GETBUF_TRYLOCK); if (!bp) return; @@ -167,6 +167,14 @@ pf_read_bmbt_reclist( xfs_bmbt_irec_t irec; xfs_dfilblks_t cp = 0; /* prev count */ xfs_dfiloff_t op = 0; /* prev offset */ +#define MAP_ARRAY_SZ 4 + struct xfs_buf_map map_array[MAP_ARRAY_SZ]; + struct xfs_buf_map *map = map_array; + int max_extents = MAP_ARRAY_SZ; + int nmaps = 0;; + unsigned int len = 0; + int ret = 0; + for (i = 0; i < numrecs; i++) { libxfs_bmbt_disk_get_all(rp + i, &irec); @@ -174,11 +182,11 @@ pf_read_bmbt_reclist( if (((i > 0) && (op + cp > irec.br_startoff)) || (irec.br_blockcount == 0) || (irec.br_startoff >= fs_max_file_offset)) - return 0; + goto out_free; if (!verify_dfsbno(mp, irec.br_startblock) || !verify_dfsbno(mp, irec.br_startblock + irec.br_blockcount - 1)) - return 0; + goto out_free; if (!args->dirs_only && ((irec.br_startoff + irec.br_blockcount) >= mp->m_dirfreeblk)) @@ -188,18 +196,59 @@ pf_read_bmbt_reclist( cp = irec.br_blockcount; while (irec.br_blockcount) { - unsigned int len; + unsigned int bm_len; pftrace("queuing dir extent in AG %d", args->agno); - len = (irec.br_blockcount > mp->m_dirblkfsbs) ? - mp->m_dirblkfsbs : irec.br_blockcount; - pf_queue_io(args, irec.br_startblock, len, B_DIR_META); - irec.br_blockcount -= len; - irec.br_startblock += len; + if (len + irec.br_blockcount >= mp->m_dirblkfsbs) + bm_len = mp->m_dirblkfsbs - len; + else + bm_len = irec.br_blockcount; + len += bm_len; + + map[nmaps].bm_bn = XFS_FSB_TO_DADDR(mp, + irec.br_startblock); + map[nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len); + nmaps++; + + if (len == mp->m_dirblkfsbs) { + pf_queue_io(args, map, nmaps, B_DIR_META); + len = 0; + nmaps = 0; + } + + irec.br_blockcount -= bm_len; + irec.br_startblock += bm_len; + + /* + * Handle very fragmented dir2 blocks with dynamically + * allocated buffer maps. + */ + if (nmaps >= max_extents) { + struct xfs_buf_map *old_map = NULL; + + if (map == map_array) { + old_map = map; + map = NULL; + } + max_extents *= 2; + map = realloc(map, max_extents * sizeof(*map)); + if (map == NULL) { + do_error( + _("couldn't malloc dir2 buffer list\n")); + exit(1); + } + if (old_map) + memcpy(map, old_map, sizeof(map_array)); + } + } } - return 1; + ret = 1; +out_free: + if (map != map_array) + free(map); + return ret; } /* @@ -249,7 +298,8 @@ pf_scanfunc_bmap( /* * do some validation on the block contents */ - if ((be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC) || + if ((block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) && + block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC)) || (be16_to_cpu(block->bb_level) != level)) return 0; @@ -395,9 +445,28 @@ pf_read_inode_dirs( } /* - * pf_batch_read must be called with the lock locked. + * Discontiguous buffers require multiple IOs to fill, so we can't use any + * linearising, hole filling algorithms on them to avoid seeks. Just remove them + * for the prefetch queue and read them straight into the cache and release + * them. */ +static void +pf_read_discontig( + struct prefetch_args *args, + struct xfs_buf *bp) +{ + if (!btree_delete(args->io_queue, XFS_DADDR_TO_FSB(mp, bp->b_bn))) + do_error(_("prefetch corruption\n")); + + pthread_mutex_unlock(&args->lock); + libxfs_readbufr_map(mp->m_ddev_targp, bp, 0); + libxfs_putbuf(bp); + pthread_mutex_lock(&args->lock); +} +/* + * pf_batch_read must be called with the lock locked. + */ static void pf_batch_read( prefetch_args_t *args, @@ -426,8 +495,15 @@ pf_batch_read( max_fsbno = fsbno + pf_max_fsbs; } while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) { - if (which != PF_META_ONLY || - !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num]))) + /* + * Handle discontiguous buffers outside the seek + * optimised IO loop below. + */ + if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) { + pf_read_discontig(args, bplist[num]); + bplist[num] = NULL; + } else if (which != PF_META_ONLY || + !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num]))) num++; if (num == MAX_BUFS) break; @@ -648,7 +724,7 @@ pf_queuing_worker( irec, args->agno, i); #endif err = sem_trywait(&args->ra_count); - if (err == EAGAIN) { + if (err < 0 && errno == EAGAIN) { /* * Kick the queue once we have reached the limit; * without this the threads processing the inodes @@ -664,10 +740,13 @@ pf_queuing_worker( bno = XFS_AGINO_TO_AGBNO(mp, cur_irec->ino_startnum); do { - pf_queue_io(args, XFS_AGB_TO_FSB(mp, args->agno, bno), - blks_per_cluster, - (cur_irec->ino_isa_dir != 0) ? - B_DIR_INODE : B_INODE); + struct xfs_buf_map map; + + map.bm_bn = XFS_AGB_TO_DADDR(mp, args->agno, bno); + map.bm_len = XFS_FSB_TO_BB(mp, blks_per_cluster); + pf_queue_io(args, &map, 1, + (cur_irec->ino_isa_dir != 0) ? B_DIR_INODE + : B_INODE); bno += blks_per_cluster; num_inos += inodes_per_cluster; } while (num_inos < XFS_IALLOC_INODES(mp)); @@ -787,6 +866,140 @@ start_inode_prefetch( return args; } +/* + * prefetch_ag_range runs a prefetch-and-process loop across a range of AGs. It + * begins with @start+ag, and finishes with @end_ag - 1 (i.e. does not prefetch + * or process @end_ag). The function starts prefetch on the first AG, then loops + * starting prefetch on the next AG and then blocks processing the current AG as + * the prefetch queue brings inodes into the processing queue. + * + * There is only one prefetch taking place at a time, so the prefetch on the + * next AG only starts once the current AG has been completely prefetched. Hence + * the prefetch of the next AG will start some time before the processing of the + * current AG finishes, ensuring that when we iterate an start processing the + * next AG there is already a significant queue of inodes to process. + * + * Prefetch is done this way to prevent it from running too far ahead of the + * processing. Allowing it to do so can cause cache thrashing, where new + * prefetch causes previously prefetched buffers to be reclaimed before the + * processing thread uses them. This results in reading all the inodes and + * metadata twice per phase and it greatly slows down the processing. Hence we + * have to carefully control how far ahead we prefetch... + */ +static void +prefetch_ag_range( + struct work_queue *work, + xfs_agnumber_t start_ag, + xfs_agnumber_t end_ag, + bool dirs_only, + void (*func)(struct work_queue *, + xfs_agnumber_t, void *)) +{ + int i; + struct prefetch_args *pf_args[2]; + + pf_args[start_ag & 1] = start_inode_prefetch(start_ag, dirs_only, NULL); + for (i = start_ag; i < end_ag; i++) { + /* Don't prefetch end_ag */ + if (i + 1 < end_ag) + pf_args[(~i) & 1] = start_inode_prefetch(i + 1, + dirs_only, pf_args[i & 1]); + func(work, i, pf_args[i & 1]); + } +} + +struct pf_work_args { + xfs_agnumber_t start_ag; + xfs_agnumber_t end_ag; + bool dirs_only; + void (*func)(struct work_queue *, xfs_agnumber_t, void *); +}; + +static void +prefetch_ag_range_work( + struct work_queue *work, + xfs_agnumber_t unused, + void *args) +{ + struct pf_work_args *wargs = args; + + prefetch_ag_range(work, wargs->start_ag, wargs->end_ag, + wargs->dirs_only, wargs->func); + free(args); +} + +/* + * Do inode prefetch in the most optimal way for the context under which repair + * has been run. + */ +void +do_inode_prefetch( + struct xfs_mount *mp, + int stride, + void (*func)(struct work_queue *, + xfs_agnumber_t, void *), + bool check_cache, + bool dirs_only) +{ + int i; + struct work_queue queue; + struct work_queue *queues; + + /* + * If the previous phases of repair have not overflowed the buffer + * cache, then we don't need to re-read any of the metadata in the + * filesystem - it's all in the cache. In that case, run a thread per + * CPU to maximise parallelism of the queue to be processed. + */ + if (check_cache && !libxfs_bcache_overflowed()) { + queue.mp = mp; + create_work_queue(&queue, mp, libxfs_nproc()); + for (i = 0; i < mp->m_sb.sb_agcount; i++) + queue_work(&queue, func, i, NULL); + destroy_work_queue(&queue); + return; + } + + /* + * single threaded behaviour - single prefetch thread, processed + * directly after each AG is queued. + */ + if (!stride) { + queue.mp = mp; + prefetch_ag_range(&queue, 0, mp->m_sb.sb_agcount, + dirs_only, func); + return; + } + + /* + * create one worker thread for each segment of the volume + */ + queues = malloc(thread_count * sizeof(work_queue_t)); + for (i = 0; i < thread_count; i++) { + struct pf_work_args *wargs; + + wargs = malloc(sizeof(struct pf_work_args)); + wargs->start_ag = i * stride; + wargs->end_ag = min((i + 1) * stride, + mp->m_sb.sb_agcount); + wargs->dirs_only = dirs_only; + wargs->func = func; + + create_work_queue(&queues[i], mp, 1); + queue_work(&queues[i], prefetch_ag_range_work, 0, wargs); + + if (wargs->end_ag >= mp->m_sb.sb_agcount) + break; + } + + /* + * wait for workers to complete + */ + while (i--) + destroy_work_queue(&queues[i]); + free(queues); +} + void wait_for_inode_prefetch( prefetch_args_t *args) diff --git a/repair/prefetch.h b/repair/prefetch.h index 44a406c..b837752 100644 --- a/repair/prefetch.h +++ b/repair/prefetch.h @@ -4,6 +4,7 @@ #include #include "incore.h" +struct work_queue; extern int do_prefetch; @@ -41,6 +42,15 @@ start_inode_prefetch( prefetch_args_t *prev_args); void +do_inode_prefetch( + struct xfs_mount *mp, + int stride, + void (*func)(struct work_queue *, + xfs_agnumber_t, void *), + bool check_cache, + bool dirs_only); + +void wait_for_inode_prefetch( prefetch_args_t *args); diff --git a/repair/protos.h b/repair/protos.h index 601f2a9..ff42fa7 100644 --- a/repair/protos.h +++ b/repair/protos.h @@ -18,7 +18,8 @@ void xfs_init(libxfs_init_t *args); -int verify_sb(xfs_sb_t *sb, +int verify_sb(char *sb_buf, + xfs_sb_t *sb, int is_primary_sb); int verify_set_primary_sb(xfs_sb_t *root_sb, int sb_index, diff --git a/repair/sb.c b/repair/sb.c index c54d89b..b111aca 100644 --- a/repair/sb.c +++ b/repair/sb.c @@ -139,7 +139,7 @@ find_secondary_sb(xfs_sb_t *rsb) c_bufsb = (char *)sb + i; libxfs_sb_from_disk(&bufsb, (xfs_dsb_t *)c_bufsb); - if (verify_sb(&bufsb, 0) != XR_OK) + if (verify_sb(c_bufsb, &bufsb, 0) != XR_OK) continue; do_warn(_("found candidate secondary superblock...\n")); @@ -245,7 +245,7 @@ sb_validate_ino_align(struct xfs_sb *sb) */ int -verify_sb(xfs_sb_t *sb, int is_primary_sb) +verify_sb(char *sb_buf, xfs_sb_t *sb, int is_primary_sb) { __uint32_t bsize; int i; @@ -263,8 +263,34 @@ verify_sb(xfs_sb_t *sb, int is_primary_sb) if (is_primary_sb && sb->sb_inprogress == 1) return(XR_BAD_INPROGRESS); - /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */ + /* + * before going *any further*, validate the sector size and if the + * version says we should have CRCs enabled, validate that. + */ + + /* check to make sure sectorsize is legal 2^N, 9 <= N <= 15 */ + if (sb->sb_sectsize == 0) + return(XR_BAD_SECT_SIZE_DATA); + + bsize = 1; + for (i = 0; bsize < sb->sb_sectsize && + i < sizeof(sb->sb_sectsize) * NBBY; i++) { + bsize <<= 1; + } + + if (i < XFS_MIN_SECTORSIZE_LOG || i > XFS_MAX_SECTORSIZE_LOG) + return(XR_BAD_SECT_SIZE_DATA); + + /* check sb sectorsize field against sb sectlog field */ + if (i != sb->sb_sectlog) + return(XR_BAD_SECT_SIZE_DATA); + + /* sector size in range - CRC check time */ + if (xfs_sb_version_hascrc(sb) && + !xfs_verify_cksum(sb_buf, sb->sb_sectsize, XFS_SB_CRC_OFF)) + return XR_BAD_CRC; + /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */ if (sb->sb_blocksize == 0) return(XR_BAD_BLOCKSIZE); @@ -300,26 +326,6 @@ verify_sb(xfs_sb_t *sb, int is_primary_sb) sb->sb_inopblock != howmany(sb->sb_blocksize,sb->sb_inodesize)) return(XR_BAD_INO_SIZE_DATA); - /* check to make sure sectorsize is legal 2^N, 9 <= N <= 15 */ - - if (sb->sb_sectsize == 0) - return(XR_BAD_SECT_SIZE_DATA); - - bsize = 1; - - for (i = 0; bsize < sb->sb_sectsize && - i < sizeof(sb->sb_sectsize) * NBBY; i++) { - bsize <<= 1; - } - - if (i < XFS_MIN_SECTORSIZE_LOG || i > XFS_MAX_SECTORSIZE_LOG) - return(XR_BAD_SECT_SIZE_DATA); - - /* check sb sectorsize field against sb sectlog field */ - - if (i != sb->sb_sectlog) - return(XR_BAD_SECT_SIZE_DATA); - if (xfs_sb_version_hassector(sb)) { /* check to make sure log sector is legal 2^N, 9 <= N <= 15 */ @@ -482,9 +488,11 @@ write_primary_sb(xfs_sb_t *sbp, int size) do_error(_("couldn't seek to offset 0 in filesystem\n")); } - libxfs_sb_to_disk(buf, sbp, XFS_SB_ALL_BITS); + if (xfs_sb_version_hascrc(sbp)) + xfs_update_cksum((char *)buf, size, XFS_SB_CRC_OFF); + if (write(x.dfd, buf, size) != size) { free(buf); do_error(_("primary superblock write failed!\n")); @@ -494,7 +502,7 @@ write_primary_sb(xfs_sb_t *sbp, int size) } /* - * get a possible superblock -- don't check for internal consistency + * get a possible superblock -- checks for internal consistency */ int get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) @@ -529,9 +537,10 @@ get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) do_error("%s\n", strerror(error)); } libxfs_sb_from_disk(sbp, buf); - free(buf); - return (verify_sb(sbp, 0)); + rval = verify_sb((char *)buf, sbp, agno == 0); + free(buf); + return rval; } /* returns element on list with highest reference count */ @@ -745,13 +754,11 @@ verify_set_primary_sb(xfs_sb_t *rsb, off = (xfs_off_t)agno * rsb->sb_agblocks << rsb->sb_blocklog; checked[agno] = 1; - - if (get_sb(sb, off, size, agno) == XR_EOF) { - retval = XR_EOF; + retval = get_sb(sb, off, size, agno); + if (retval == XR_EOF) goto out_free_list; - } - if (verify_sb(sb, 0) == XR_OK) { + if (retval == XR_OK) { /* * save away geometry info. * don't bother checking the sb diff --git a/repair/scan.c b/repair/scan.c index 49ed194..1744c32 100644 --- a/repair/scan.c +++ b/repair/scan.c @@ -171,17 +171,12 @@ scan_bmapbt( xfs_bmbt_rec_t *rp; xfs_dfiloff_t first_key; xfs_dfiloff_t last_key; - char *forkname; + char *forkname = get_forkname(whichfork); int numrecs; xfs_agnumber_t agno; xfs_agblock_t agbno; int state; - if (whichfork == XFS_DATA_FORK) - forkname = _("data"); - else - forkname = _("attr"); - /* * unlike the ag freeblock btrees, if anything looks wrong * in an inode bmap tree, just bail. it's possible that @@ -273,7 +268,7 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n" agno = XFS_FSB_TO_AGNO(mp, bno); agbno = XFS_FSB_TO_AGBNO(mp, bno); - pthread_mutex_lock(&ag_locks[agno]); + pthread_mutex_lock(&ag_locks[agno].lock); state = get_bmap(agno, agbno); switch (state) { case XR_E_UNKNOWN: @@ -319,7 +314,7 @@ _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"), state, ino, bno); break; } - pthread_mutex_unlock(&ag_locks[agno]); + pthread_mutex_unlock(&ag_locks[agno].lock); } else { /* * attribute fork for realtime files is in the regular @@ -866,9 +861,9 @@ _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n") for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { if (XFS_INOBT_IS_FREE_DISK(rp, j)) { nfree++; - add_aginode_uncertain(agno, ino + j, 1); + add_aginode_uncertain(mp, agno, ino + j, 1); } else { - add_aginode_uncertain(agno, ino + j, 0); + add_aginode_uncertain(mp, agno, ino + j, 0); } } } @@ -1229,7 +1224,6 @@ scan_ag( do_error(_("can't get root superblock for ag %d\n"), agno); return; } - sb = (xfs_sb_t *)calloc(BBSIZE, 1); if (!sb) { do_error(_("can't allocate memory for superblock\n")); diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index 7beffcb..08b25f0 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -29,6 +29,7 @@ #include "prefetch.h" #include "threads.h" #include "progress.h" +#include "dinode.h" #define rounddown(x, y) (((x)/(y))*(y)) @@ -136,6 +137,8 @@ err_string(int err_code) _("bad stripe width in superblock"); err_message[XR_BAD_SVN] = _("bad shared version number in superblock"); + err_message[XR_BAD_CRC] = + _("bad CRC in superblock"); done = 1; } @@ -528,11 +531,14 @@ main(int argc, char **argv) xfs_buf_t *sbp; xfs_mount_t xfs_m; char *msgbuf; + struct xfs_sb psb; + int rval; progname = basename(argv[0]); setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); + dinode_bmbt_translation_init(); temp_mp = &xfs_m; setbuf(stdout, NULL); @@ -556,13 +562,12 @@ main(int argc, char **argv) exit(1); } - /* prepare the mount structure */ - memset(&xfs_m, 0, sizeof(xfs_mount_t)); - libxfs_buftarg_init(&xfs_m, x.ddev, x.logdev, x.rtdev); - sbp = libxfs_readbuf(xfs_m.m_ddev_targp, XFS_SB_DADDR, - 1 << (XFS_MAX_SECTORSIZE_LOG - BBSHIFT), 0, - &xfs_sb_buf_ops); - libxfs_sb_from_disk(&xfs_m.m_sb, XFS_BUF_TO_SBP(sbp)); + rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0); + if (rval != XR_OK) { + do_warn(_("Primary superblock bad after phase 1!\n" + "Exiting now.\n")); + exit(1); + } /* * if the sector size of the filesystem we are trying to repair is @@ -581,7 +586,7 @@ main(int argc, char **argv) geom.sectsize = BBSIZE; } - if (xfs_m.m_sb.sb_sectsize < geom.sectsize) { + if (psb.sb_sectsize < geom.sectsize) { long old_flags; old_flags = fcntl(fd, F_GETFL, 0); @@ -593,7 +598,10 @@ main(int argc, char **argv) } } } - mp = libxfs_mount(&xfs_m, &xfs_m.m_sb, x.ddev, x.logdev, x.rtdev, 0); + + /* prepare the mount structure */ + memset(&xfs_m, 0, sizeof(xfs_mount_t)); + mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0); if (!mp) { fprintf(stderr, @@ -601,8 +609,6 @@ main(int argc, char **argv) progname); exit(1); } - libxfs_putbuf(sbp); - libxfs_purgebuf(sbp); /* * set XFS-independent status vars from the mount/sb structure @@ -627,13 +633,32 @@ main(int argc, char **argv) * to target these for an increase in thread count. Hence a stride value * of 15 is chosen to ensure we get at least 2 AGs being scanned at once * on such filesystems. + * + * Limit the maximum thread count based on the available CPU power that + * is available. If we use too many threads, we might run out of memory + * and CPU power before we run out of IO concurrency. We limit to 8 + * threads/CPU as this is enough threads to saturate a CPU on fast + * devices, yet few enough that it will saturate but won't overload slow + * devices. */ if (!ag_stride && glob_agcount >= 16 && do_prefetch) ag_stride = 15; if (ag_stride) { + int max_threads = platform_nproc() * 8; + thread_count = (glob_agcount + ag_stride - 1) / ag_stride; - thread_init(); + while (thread_count > max_threads) { + ag_stride *= 2; + thread_count = (glob_agcount + ag_stride - 1) / + ag_stride; + } + if (thread_count > 0) + thread_init(); + else { + thread_count = 1; + ag_stride = 0; + } } if (ag_stride && report_interval) { @@ -895,6 +920,11 @@ _("Note - stripe unit (%d) and width (%d) fields have been reset.\n" if (verbose) summary_report(); do_log(_("done\n")); + + if (dangerously && !no_modify) + do_warn( +_("Repair of readonly mount complete. Immediate reboot encouraged.\n")); + pftrace_done(); return (0);