From 0690ef86d809f3f177ac6e207cf57a92cb48f19a Mon Sep 17 00:00:00 2001 From: CentOS Buildsys Date: Apr 08 2014 14:06:08 +0000 Subject: import xfsprogs-3.2.0-0.10.alpha2.el7.src.rpm --- diff --git a/.xfsprogs.metadata b/.xfsprogs.metadata index 6be3a27..ccb64e7 100644 --- a/.xfsprogs.metadata +++ b/.xfsprogs.metadata @@ -1 +1 @@ -4c45dbd1f05a3d0ed32e912297f4c6a93c8e4cff SOURCES/xfsprogs-3.2.0-alpha1.tar.gz +7647534fa3b2b2c61dc625d58bfc5671d6895ff4 SOURCES/xfsprogs-3.2.0-alpha2.tar.gz diff --git a/SOURCES/xfsprogs-3.2.0-bigendian.patch b/SOURCES/xfsprogs-3.2.0-bigendian.patch deleted file mode 100644 index 20f1bd8..0000000 --- a/SOURCES/xfsprogs-3.2.0-bigendian.patch +++ /dev/null @@ -1,46 +0,0 @@ -[PATCH] xfsprogs: fix crc32 build on big endian - -While kernelspace can test #ifdef __LITTLE_ENDIAN, this -doesn't work in userspace. __LITTLE_ENDIAN is defined - -as is __BIG_ENDIAN. - -So we build on all boxes as __LITTLE_ENDIAN, and the -self-test (thankfully!) fails on big endian boxes. - -Fix this by testing __BYTE_ORDER values. - -And add an else which should never be hit, but just in case... - -Signed-off-by: Eric Sandeen -Reviewed-by: Dave Chinner ---- - -diff --git a/libxfs/crc32.c b/libxfs/crc32.c -index 1c0d958..0f847d2 100644 ---- a/libxfs/crc32.c -+++ b/libxfs/crc32.c -@@ -63,18 +63,20 @@ typedef __u32 u64; - static inline u32 - crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) - { --# ifdef __LITTLE_ENDIAN -+#if __BYTE_ORDER == __LITTLE_ENDIAN - # define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) - # define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \ - t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255]) - # define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \ - t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255]) --# else -+# elif __BYTE_ORDER == __BIG_ENDIAN - # define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) - # define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \ - t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255]) - # define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \ - t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255]) -+# else -+# error What endian are you? - # endif - const u32 *b; - size_t rem_len; - - diff --git a/SOURCES/xfsprogs-3.2.0-fix-l_sectBBsize.patch b/SOURCES/xfsprogs-3.2.0-fix-l_sectBBsize.patch deleted file mode 100644 index 32c4bd0..0000000 --- a/SOURCES/xfsprogs-3.2.0-fix-l_sectBBsize.patch +++ /dev/null @@ -1,58 +0,0 @@ -[PATCH] xfsprogs: remove incorrect l_sectBBsize assignment in xfs_repair - -Commit e0607266 xfsprogs: add crc format support to repair - -added a 2nd assignment to l_sectBBsize: - - log.l_sectBBsize = 1 << mp->m_sb.sb_logsectlog; - -which is incorrect; sb_logsectlog is log2 of the sector size, -in bytes; l_sectBBsize is the size of the log sector in -512-byte units. - -So for a 4k sector size log, we were assigning 4096 rather -than 8. This broke xlog_find_tail, and caused xfs_repair -to think that a log was dirty even when it was clean: - -"ERROR: The filesystem has valuable metadata changes in a log" - -(xfs_logprint didn't have this error, so xfs_logprint -t -agreed that the filesystem really was clean). - -Just remove the incorrect assignment; it was already properly -assigned about 12 lines prior: - - log.l_sectBBsize = BTOBB(x.lbsize); - -and things work again. - -(This worked accidentally for 512-sector devices, because -we special-case those and set sb_logsectlog to "0" rather -than 9, so l_sectBBsize came out to "1" (as in 1 sector), -as it should have). - -Reported-by: Markus Trippelsdorf -Signed-off-by: Eric Sandeen -Reviewed-by: Carlos Maiolino -Reviewed-by: Dave Chinner ---- - -diff --git a/repair/phase2.c b/repair/phase2.c -index a62854e..2817fed 100644 ---- a/repair/phase2.c -+++ b/repair/phase2.c -@@ -64,7 +64,6 @@ zero_log(xfs_mount_t *mp) - ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); - } - log.l_sectbb_mask = (1 << log.l_sectbb_log) - 1; -- log.l_sectBBsize = 1 << mp->m_sb.sb_logsectlog; - - if ((error = xlog_find_tail(&log, &head_blk, &tail_blk))) { - do_warn(_("zero_log: cannot find log head/tail " - -_______________________________________________ -xfs mailing list -xfs@oss.sgi.com -http://oss.sgi.com/mailman/listinfo/xfs - - diff --git a/SOURCES/xfsprogs-3.2.0-init-mounts-symlinks.patch b/SOURCES/xfsprogs-3.2.0-init-mounts-symlinks.patch deleted file mode 100644 index 9e0a002..0000000 --- a/SOURCES/xfsprogs-3.2.0-init-mounts-symlinks.patch +++ /dev/null @@ -1,161 +0,0 @@ -[PATCH] xfsprogs: handle symlinks etc in fs_table_initialise_mounts() - -Commit: - -6a23747d xfs_quota: support relative path as `path' arguments - -used realpath() on the supplied pathname to handle things like -relative pathnames and pathnames ending in "/" which otherwise -caused the getmntent scanning to fail. - -However, this regressed cases where a path in mtab was a symlink; -realpath() resolves this to the target, and so no match is found. - -This causes i.e.: - -# xfs_quota -x -c report /dev/mapper/testvg-testlv - -to fail with: - -xfs_quota: cannot setup path for mount /dev/mapper/testvg-testlv: No such device or address - -because the scanning looks for /dev/dm-3, but the long symlink -name is what exists in mtab, and no match is found. - -Fix this, but keep the intended enhancements, by testing *both* the -user-specified path (which might be relative, or contain a trailing -slash on a mountpoint) and the realpath-resolved path (which turns -a relative mountpoint into a full path, and removes trailing slashes), -to determine whether the user-specified path is an xfs mountpoint or -device. - -While we're at it, add a few comments, and go back to the testing -of "path" not "rpath"; whether or not path is passed to the function -is what determines control flow. If path is specified, and realpath -succeeds, we're guaranteed to have rpath as well, so there is no need -to retest that. rpath is initialized to NULL, so an unconditional -free(rpath) is safe as well. - -Signed-off-by: Eric Sandeen ---- - - -diff --git a/libxcmd/paths.c b/libxcmd/paths.c -index bd84cde..7b0e434 100644 ---- a/libxcmd/paths.c -+++ b/libxcmd/paths.c -@@ -266,6 +266,10 @@ out_nomem: - return ENOMEM; - } - -+/* -+ * If *path is NULL, initialize the fs table with all xfs mount points in mtab -+ * If *path is specified, search for that path in mtab -+ */ - static int - fs_table_initialise_mounts( - char *path) -@@ -288,6 +292,7 @@ fs_table_initialise_mounts( - if ((mtp = setmntent(mtab_file, "r")) == NULL) - return ENOENT; - -+ /* Use realpath to resolve symlinks, relative paths, etc */ - if (path) - if ((rpath = realpath(path, NULL)) == NULL) - return ENOENT; -@@ -295,31 +300,37 @@ fs_table_initialise_mounts( - while ((mnt = getmntent(mtp)) != NULL) { - if (strcmp(mnt->mnt_type, "xfs") != 0) - continue; -- if (rpath && -- ((strcmp(rpath, mnt->mnt_dir) != 0) && -+ if (path && -+ ((strcmp(path, mnt->mnt_dir) != 0) && -+ (strcmp(path, mnt->mnt_fsname) != 0) && -+ (strcmp(rpath, mnt->mnt_dir) != 0) && - (strcmp(rpath, mnt->mnt_fsname) != 0))) - continue; - if (fs_extract_mount_options(mnt, &fslog, &fsrt)) - continue; - (void) fs_table_insert(mnt->mnt_dir, 0, FS_MOUNT_POINT, - mnt->mnt_fsname, fslog, fsrt); -- if (rpath) { -+ if (path) { - found = 1; - break; - } - } - endmntent(mtp); -- if (rpath) { -- free(rpath); -- if (!found) -- error = ENXIO; -- } -+ free(rpath); -+ -+ if (path && !found) -+ error = ENXIO; -+ - return error; - } - - #elif defined(HAVE_GETMNTINFO) - #include - -+/* -+ * If *path is NULL, initialize the fs table with all xfs mount points in mtab -+ * If *path is specified, search for that path in mtab -+ */ - static int - fs_table_initialise_mounts( - char *path) -@@ -335,6 +346,7 @@ fs_table_initialise_mounts( - return 0; - } - -+ /* Use realpath to resolve symlinks, relative paths, etc */ - if (path) - if ((rpath = realpath(path, NULL)) == NULL) - return ENOENT; -@@ -342,24 +354,24 @@ fs_table_initialise_mounts( - for (i = 0; i < count; i++) { - if (strcmp(stats[i].f_fstypename, "xfs") != 0) - continue; -- if (rpath && -- ((strcmp(rpath, stats[i].f_mntonname) != 0) && -+ if (path && -+ ((strcmp(path, stats[i].f_mntonname) != 0) && -+ (strcmp(path, stats[i].f_mntfromname) != 0) && -+ (strcmp(rpath, stats[i].f_mntonname) != 0) && - (strcmp(rpath, stats[i].f_mntfromname) != 0))) - continue; - /* TODO: external log and realtime device? */ - (void) fs_table_insert(stats[i].f_mntonname, 0, - FS_MOUNT_POINT, stats[i].f_mntfromname, - NULL, NULL); -- if (rpath) { -+ if (path) { - found = 1; - break; - } - } -- if (rpath) { -- free(rpath); -- if (!found) -- error = ENXIO; -- } -+ free(rpath); -+ if (path && !found) -+ error = ENXIO; - - return error; - } - - - -_______________________________________________ -xfs mailing list -xfs@oss.sgi.com -http://oss.sgi.com/mailman/listinfo/xfs - - diff --git a/SOURCES/xfsprogs-3.2.0-prefetch-fix.patch b/SOURCES/xfsprogs-3.2.0-prefetch-fix.patch new file mode 100644 index 0000000..a3f8e56 --- /dev/null +++ b/SOURCES/xfsprogs-3.2.0-prefetch-fix.patch @@ -0,0 +1,71 @@ +[PATCH] xfs_repair: fix prefetch queue waiting + +This fixes a regression caused by: + +97b1fcf xfs_repair: fix array overrun in do_inode_prefetch + +The thread creation loop has 2 ways to exit; either via +the loop counter based on thread_count, or the break statement +if we've started enough workers to cover all AGs. + +Whether or not the loop counter "i" reflects the number of +threads started depends on whether or not we exited via the +break. + +The above commit prevented us from indexing off the end +of the queues[] array if we actually advanced "i" all the +way to thread_count, but in the case where we break, "i" +is one *less* than the nr of threads started, so we don't +wait for completion of all threads, and all hell breaks +loose in phase 5. + +Just stop with the cleverness of re-using the loop counter - +instead, explicitly count threads that we start, and then use +that counter to wait for each worker to complete. + +Signed-off-by: Eric Sandeen +Reviewed-by: Brian Foster +--- + +I have one fs which demonstrates the problem, and have verified +the regression & tested the fix against that. + +I'll run this over xfstests overnight, but it seems obvious +from here (OTOH the other fix seemed obvious too) + +diff --git a/repair/prefetch.c b/repair/prefetch.c +index e47a48e..4c32395 100644 +--- a/repair/prefetch.c ++++ b/repair/prefetch.c +@@ -944,6 +944,7 @@ do_inode_prefetch( + int i; + struct work_queue queue; + struct work_queue *queues; ++ int queues_started = 0; + + /* + * If the previous phases of repair have not overflowed the buffer +@@ -987,6 +988,7 @@ do_inode_prefetch( + + create_work_queue(&queues[i], mp, 1); + queue_work(&queues[i], prefetch_ag_range_work, 0, wargs); ++ queues_started++; + + if (wargs->end_ag >= mp->m_sb.sb_agcount) + break; +@@ -995,7 +997,7 @@ do_inode_prefetch( + /* + * wait for workers to complete + */ +- while (i--) ++ for (i = 0; i < queues_started; i++) + destroy_work_queue(&queues[i]); + free(queues); + } + +_______________________________________________ +xfs mailing list +xfs@oss.sgi.com +http://oss.sgi.com/mailman/listinfo/xfs + + diff --git a/SOURCES/xfsprogs-3.2.0-repair-zero-sb.patch b/SOURCES/xfsprogs-3.2.0-repair-zero-sb.patch new file mode 100644 index 0000000..7aa8693 --- /dev/null +++ b/SOURCES/xfsprogs-3.2.0-repair-zero-sb.patch @@ -0,0 +1,30 @@ +[PATCH] repair: ensure that unused superblock fields are zeroed + +From: Dave Chinner + +When we grab a superblock off disk via get_sb(), we don't know what +the in-memory superblock we are filling out contained. We ned to +ensure that the entire structure is returned in an initialised +state regardless of which fields libxfs_sb_from_disk() populates +from disk. In this case, it doesn't populate the sb_crc field, +and so uninitialised values can escape through to disk on v4 +filesystems because of this. This causes xfs/031 to fail on v4 +filesystems. + +Reported-by: Eric Sandeen +Signed-off-by: Dave Chinner +--- + repair/sb.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/repair/sb.c b/repair/sb.c +index b111aca..d928dc0 100644 +--- a/repair/sb.c ++++ b/repair/sb.c +@@ -518,6 +518,7 @@ get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) + exit(1); + } + memset(buf, 0, size); ++ memset(sbp, 0, sizeof(*sbp)); + + /* try and read it first */ diff --git a/SOURCES/xfsprogs-3.2.0-test_fd_regular_files.patch b/SOURCES/xfsprogs-3.2.0-test_fd_regular_files.patch deleted file mode 100644 index 45c5dfc..0000000 --- a/SOURCES/xfsprogs-3.2.0-test_fd_regular_files.patch +++ /dev/null @@ -1,77 +0,0 @@ - - -If a special file (block, char, pipe etc) resides on an -xfs filesystem, platform_test_xfs_[fd|path] will return -true, but a subsequent xfsctl will fail, because the file -operations to support the xfs ioctls are not set up on such -files (see i_fop assignments in xfs_setup_inode()). - -From the xfsctl manpage it's pretty clear that these functions -are supposed to return true iff a subsequent xfsctl can be -handled, so it makes sense to exclude special files. - -This was showing up in xfstest generic/306, which creates -the dev/null block device on an xfstest an tries to pwrite -to it with xfs_io - which emitted a warning when the xfsctl -trying to get geometry failed. - -Signed-off-by: Eric Sandeen -Reviewed-by: Christoph Hellwig ---- - -diff --git a/include/linux.h b/include/linux.h -index 5bb91cd..502fd1f 100644 ---- a/include/linux.h -+++ b/include/linux.h -@@ -34,20 +34,38 @@ static __inline__ int xfsctl(const char *path, int fd, int cmd, void *p) - return ioctl(fd, cmd, p); - } - -+/* -+ * platform_test_xfs_*() implies that xfsctl will succeed on the file; -+ * on Linux, at least, special files don't get xfs file ops, -+ * so return 0 for those -+ */ -+ - static __inline__ int platform_test_xfs_fd(int fd) - { -- struct statfs buf; -- if (fstatfs(fd, &buf) < 0) -+ struct statfs statfsbuf; -+ struct stat statbuf; -+ -+ if (fstatfs(fd, &statfsbuf) < 0) -+ return 0; -+ if (fstat(fd, &statbuf) < 0) - return 0; -- return (buf.f_type == 0x58465342); /* XFSB */ -+ if (!S_ISREG(statbuf.st_mode) && !S_ISDIR(statbuf.st_mode)) -+ return 0; -+ return (statfsbuf.f_type == 0x58465342); /* XFSB */ - } - - static __inline__ int platform_test_xfs_path(const char *path) - { -- struct statfs buf; -- if (statfs(path, &buf) < 0) -+ struct statfs statfsbuf; -+ struct stat statbuf; -+ -+ if (statfs(path, &statfsbuf) < 0) -+ return 0; -+ if (stat(path, &statbuf) < 0) -+ return 0; -+ if (!S_ISREG(statbuf.st_mode) && !S_ISDIR(statbuf.st_mode)) - return 0; -- return (buf.f_type == 0x58465342); /* XFSB */ -+ return (statfsbuf.f_type == 0x58465342); /* XFSB */ - } - - static __inline__ int platform_fstatfs(int fd, struct statfs *buf) - -_______________________________________________ -xfs mailing list -xfs@oss.sgi.com -http://oss.sgi.com/mailman/listinfo/xfs - - diff --git a/SOURCES/xfsprogs-3.2.0-xfs_db-quiet.patch b/SOURCES/xfsprogs-3.2.0-xfs_db-quiet.patch new file mode 100644 index 0000000..769c6f8 --- /dev/null +++ b/SOURCES/xfsprogs-3.2.0-xfs_db-quiet.patch @@ -0,0 +1,29 @@ +[PATCH] xfs_db: hide debug bbmap output + +Most of xfsprogs building with DEBUG enables extra +checks, asserts, etc, but this bunch of printfs was +extra output that's not generally helpful for most +people's runtime experience - and it breaks xfs/290 +with all the noise. + +I assume it's for actual debugging use, and not +generally useful, so bury it a bit deeper under +it's own #ifdef. + +Signed-off-by: Eric Sandeen +--- + +diff --git a/db/io.c b/db/io.c +index 9a787c8..89e6030 100644 +--- a/db/io.c ++++ b/db/io.c +@@ -500,7 +500,7 @@ set_cur( + push_cur(); + + if (bbmap) { +-#ifdef DEBUG ++#ifdef DEBUG_BBMAP + int i; + printf(_("xfs_db got a bbmap for %lld\n"), (long long)d); + printf(_("\tblock map")); + diff --git a/SOURCES/xfsprogs-diff-since-alpha2.patch b/SOURCES/xfsprogs-diff-since-alpha2.patch new file mode 100644 index 0000000..85aef6f --- /dev/null +++ b/SOURCES/xfsprogs-diff-since-alpha2.patch @@ -0,0 +1,5337 @@ +diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c +index 9986fbf..9f2f99d 100644 +--- a/copy/xfs_copy.c ++++ b/copy/xfs_copy.c +@@ -684,6 +684,16 @@ main(int argc, char **argv) + sb = &mbuf.m_sb; + libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbp)); + ++ /* ++ * For now, V5 superblock filesystems are not supported without -d; ++ * we do not have the infrastructure yet to fix CRCs when a new UUID ++ * is generated. ++ */ ++ if (xfs_sb_version_hascrc(sb) && !duplicate) { ++ do_log(_("%s: Cannot yet copy V5 fs without '-d'\n"), progname); ++ exit(1); ++ } ++ + mp = libxfs_mount(&mbuf, sb, xargs.ddev, xargs.logdev, xargs.rtdev, 0); + if (mp == NULL) { + do_log(_("%s: %s filesystem failed to initialize\n" +@@ -957,7 +967,13 @@ main(int argc, char **argv) + ((char *)btree_buf.data + + pos - btree_buf.position); + +- ASSERT(be32_to_cpu(block->bb_magic) == XFS_ABTB_MAGIC); ++ if (be32_to_cpu(block->bb_magic) != ++ (xfs_sb_version_hascrc(&mp->m_sb) ? ++ XFS_ABTB_CRC_MAGIC : XFS_ABTB_MAGIC)) { ++ do_log(_("Bad btree magic 0x%x\n"), ++ be32_to_cpu(block->bb_magic)); ++ exit(1); ++ } + + if (be16_to_cpu(block->bb_level) == 0) + break; +diff --git a/db/attr.c b/db/attr.c +index 740d564..caa154e 100644 +--- a/db/attr.c ++++ b/db/attr.c +@@ -170,7 +170,7 @@ attr3_leaf_entries_count( + struct xfs_attr3_leafblock *leaf = obj; + + ASSERT(startoff == 0); +- if (be16_to_cpu(leaf->hdr.info.hdr.magic) != XFS_ATTR_LEAF_MAGIC) ++ if (be16_to_cpu(leaf->hdr.info.hdr.magic) != XFS_ATTR3_LEAF_MAGIC) + return 0; + return be16_to_cpu(leaf->hdr.count); + } +diff --git a/db/bit.c b/db/bit.c +index ca57d31..e8adab3 100644 +--- a/db/bit.c ++++ b/db/bit.c +@@ -128,57 +128,41 @@ getbitval( + return rval; + } + ++/* ++ * The input data can be 8, 16, 32, and 64 sized numeric values ++ * aligned on a byte boundry, or odd sized numbers stored on odd ++ * aligned offset (for example the bmbt fields). ++ * ++ * The input data sent to this routine has been converted to big endian ++ * and has been adjusted in the array so that the first input bit is to ++ * be written in the first bit in the output. ++ * ++ * If the field length and the output buffer are byte aligned, then use ++ * memcpy from the input to the output, but if either entries are not byte ++ * aligned, then loop over the entire bit range reading the input value ++ * and set/clear the matching bit in the output. ++ * ++ * example when ibuf is not multiple of a byte in length: ++ * ++ * ibuf: | BBBBBBBB | bbbxxxxx | ++ * \\\\\\\\--\\\\ ++ * obuf+bitoff: | xBBBBBBB | Bbbbxxxx | ++ * ++ */ + void + setbitval( +- void *obuf, /* buffer to write into */ +- int bitoff, /* bit offset of where to write */ +- int nbits, /* number of bits to write */ +- void *ibuf) /* source bits */ ++ void *obuf, /* start of buffer to write into */ ++ int bitoff, /* bit offset into the output buffer */ ++ int nbits, /* number of bits to write */ ++ void *ibuf) /* source bits */ + { +- char *in = (char *)ibuf; +- char *out = (char *)obuf; +- +- int bit; +- +-#if BYTE_ORDER == LITTLE_ENDIAN +- int big = 0; +-#else +- int big = 1; +-#endif +- +- /* only need to swap LE integers */ +- if (big || (nbits!=16 && nbits!=32 && nbits!=64) ) { +- /* We don't have type info, so we can only assume +- * that 2,4 & 8 byte values are integers. sigh. +- */ +- +- /* byte aligned ? */ +- if (bitoff%NBBY) { +- /* no - bit copy */ +- for (bit=0; bitsb_magicnum != XFS_SB_MAGIC) { + fprintf(stderr, _("%s: %s is not a valid XFS filesystem (unexpected SB magic number 0x%08x)\n"), + progname, fsdevice, sbp->sb_magicnum); +- if (!force) ++ if (!force) { ++ fprintf(stderr, _("Use -F to force a read attempt.\n")); + exit(EXIT_FAILURE); ++ } + } + + mp = libxfs_mount(&xmount, sbp, x.ddev, x.logdev, x.rtdev, +diff --git a/db/io.c b/db/io.c +index 123214d..9a787c8 100644 +--- a/db/io.c ++++ b/db/io.c +@@ -449,9 +449,7 @@ write_cur_bbs(void) + + + /* re-read buffer from disk */ +- ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, +- iocur_top->bbmap->b, iocur_top->bbmap->nmaps, +- 0); ++ ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, 0); + if (ret != 0) + dbprintf(_("read error: %s\n"), strerror(ret)); + } +@@ -523,10 +521,11 @@ set_cur( + } + + /* +- * keep the buffer even if the verifier says it is corrupted. ++ * Keep the buffer even if the verifier says it is corrupted. + * We're a diagnostic tool, after all. + */ +- if (!bp || (bp->b_error && bp->b_error != EFSCORRUPTED)) ++ if (!bp || (bp->b_error && bp->b_error != EFSCORRUPTED && ++ bp->b_error != EFSBADCRC)) + return; + iocur_top->buf = bp->b_addr; + iocur_top->bp = bp; +diff --git a/db/io.h b/db/io.h +index 4f24c83..ad39bee 100644 +--- a/db/io.h ++++ b/db/io.h +@@ -41,6 +41,7 @@ typedef struct iocur { + int ino_crc_ok:1; + int ino_buf:1; + int dquot_buf:1; ++ int need_crc:1; + } iocur_t; + + #define DB_RING_ADD 1 /* add to ring on set_cur */ +@@ -66,6 +67,6 @@ static inline bool + iocur_crc_valid() + { + return (iocur_top->bp && +- iocur_top->bp->b_error != EFSCORRUPTED && ++ iocur_top->bp->b_error != EFSBADCRC && + (!iocur_top->ino_buf || iocur_top->ino_crc_ok)); + } +diff --git a/db/metadump.c b/db/metadump.c +index 117dc42..09bb85a 100644 +--- a/db/metadump.c ++++ b/db/metadump.c +@@ -145,6 +145,8 @@ print_progress(const char *fmt, ...) + * even if the dump is exactly aligned, the last index will be full of + * zeros. If the last index entry is non-zero, the dump is incomplete. + * Correspondingly, the last chunk will have a count < num_indicies. ++ * ++ * Return 0 for success, -1 for failure. + */ + + static int +@@ -156,49 +158,88 @@ write_index(void) + metablock->mb_count = cpu_to_be16(cur_index); + if (fwrite(metablock, (cur_index + 1) << BBSHIFT, 1, outf) != 1) { + print_warning("error writing to file: %s", strerror(errno)); +- return 0; ++ return -errno; + } + + memset(block_index, 0, num_indicies * sizeof(__be64)); + cur_index = 0; +- return 1; ++ return 0; ++} ++ ++/* ++ * Return 0 for success, -errno for failure. ++ */ ++static int ++write_buf_segment( ++ char *data, ++ __int64_t off, ++ int len) ++{ ++ int i; ++ int ret; ++ ++ for (i = 0; i < len; i++, off++, data += BBSIZE) { ++ block_index[cur_index] = cpu_to_be64(off); ++ memcpy(&block_buffer[cur_index << BBSHIFT], data, BBSIZE); ++ if (++cur_index == num_indicies) { ++ ret = write_index(); ++ if (ret) ++ return -EIO; ++ } ++ } ++ return 0; + } + ++/* ++ * we want to preserve the state of the metadata in the dump - whether it is ++ * intact or corrupt, so even if the buffer has a verifier attached to it we ++ * don't want to run it prior to writing the buffer to the metadump image. ++ * ++ * The only reason for running the verifier is to recalculate the CRCs on a ++ * buffer that has been obfuscated. i.e. a buffer than metadump modified itself. ++ * In this case, we only run the verifier if the buffer was not corrupt to begin ++ * with so that we don't accidentally correct buffers with CRC or errors in them ++ * when we are obfuscating them. ++ */ + static int + write_buf( + iocur_t *buf) + { +- char *data; +- __int64_t off; ++ struct xfs_buf *bp = buf->bp; + int i; ++ int ret; + + /* + * Run the write verifier to recalculate the buffer CRCs and check +- * we are writing something valid to disk ++ * metadump didn't introduce a new corruption. Warn if the verifier ++ * failed, but still continue to dump it into the output file. + */ +- if (buf->bp && buf->bp->b_ops) { +- buf->bp->b_error = 0; +- buf->bp->b_ops->verify_write(buf->bp); +- if (buf->bp->b_error) { +- fprintf(stderr, +- _("%s: write verifer failed on bno 0x%llx/0x%x\n"), +- __func__, (long long)buf->bp->b_bn, +- buf->bp->b_bcount); +- return buf->bp->b_error; ++ if (buf->need_crc && bp && bp->b_ops && !bp->b_error) { ++ bp->b_ops->verify_write(bp); ++ if (bp->b_error) { ++ print_warning( ++ "obfuscation corrupted block at bno 0x%llx/0x%x", ++ (long long)bp->b_bn, bp->b_bcount); + } + } + +- for (i = 0, off = buf->bb, data = buf->data; +- i < buf->blen; +- i++, off++, data += BBSIZE) { +- block_index[cur_index] = cpu_to_be64(off); +- memcpy(&block_buffer[cur_index << BBSHIFT], data, BBSIZE); +- if (++cur_index == num_indicies) { +- if (!write_index()) +- return 0; ++ /* handle discontiguous buffers */ ++ if (!buf->bbmap) { ++ ret = write_buf_segment(buf->data, buf->bb, buf->blen); ++ if (ret) ++ return ret; ++ } else { ++ int len = 0; ++ for (i = 0; i < buf->bbmap->nmaps; i++) { ++ ret = write_buf_segment(buf->data + BBTOB(len), ++ buf->bbmap->b[i].bm_bn, ++ buf->bbmap->b[i].bm_len); ++ if (ret) ++ return ret; ++ len += buf->bbmap->b[i].bm_len; + } + } +- return !seenint(); ++ return seenint() ? -EINTR : 0; + } + + +@@ -227,7 +268,7 @@ scan_btree( + rval = !stop_on_read_error; + goto pop_out; + } +- if (!write_buf(iocur_top)) ++ if (write_buf(iocur_top)) + goto pop_out; + + if (!(*func)(iocur_top->data, agno, agbno, level - 1, btype, arg)) +@@ -974,16 +1015,23 @@ obfuscate_sf_dir( + } + } + ++/* ++ * The pathname may not be null terminated. It may be terminated by the end of ++ * a buffer or inode literal area, and the start of the next region contains ++ * unknown data. Therefore, when we get to the last component of the symlink, we ++ * cannot assume that strlen() will give us the right result. Hence we need to ++ * track the remaining pathname length and use that instead. ++ */ + static void + obfuscate_path_components( + char *buf, + __uint64_t len) + { +- uchar_t *comp; ++ uchar_t *comp = (uchar_t *)buf; ++ uchar_t *end = comp + len; + xfs_dahash_t hash; + +- comp = (uchar_t *)buf; +- while (comp < (uchar_t *)buf + len) { ++ while (comp < end) { + char *slash; + int namelen; + +@@ -991,7 +1039,7 @@ obfuscate_path_components( + slash = strchr((char *)comp, '/'); + if (!slash) { + /* last (or single) component */ +- namelen = strlen((char *)comp); ++ namelen = strnlen((char *)comp, len); + hash = libxfs_da_hashname(comp, namelen); + obfuscate_name(hash, namelen, comp); + break; +@@ -1000,11 +1048,13 @@ obfuscate_path_components( + /* handle leading or consecutive slashes */ + if (!namelen) { + comp++; ++ len--; + continue; + } + hash = libxfs_da_hashname(comp, namelen); + obfuscate_name(hash, namelen, comp); + comp += namelen + 1; ++ len -= namelen + 1; + } + } + +@@ -1080,24 +1130,11 @@ obfuscate_sf_attr( + } + } + +-/* +- * dir_data structure is used to track multi-fsblock dir2 blocks between extent +- * processing calls. +- */ +- +-static struct dir_data_s { +- int end_of_data; +- int block_index; +- int offset_to_entry; +- int bad_block; +-} dir_data; +- + static void +-obfuscate_dir_data_blocks( +- char *block, +- xfs_dfiloff_t offset, +- xfs_dfilblks_t count, +- int is_block_format) ++obfuscate_dir_data_block( ++ char *block, ++ xfs_dfiloff_t offset, ++ int is_block_format) + { + /* + * we have to rely on the fileoffset and signature of the block to +@@ -1105,133 +1142,105 @@ obfuscate_dir_data_blocks( + * for multi-fsblock dir blocks, if a name crosses an extent boundary, + * ignore it and continue. + */ +- int c; +- int dir_offset; +- char *ptr; +- char *endptr; +- +- if (is_block_format && count != mp->m_dirblkfsbs) +- return; /* too complex to handle this rare case */ +- +- for (c = 0, endptr = block; c < count; c++) { +- +- if (dir_data.block_index == 0) { +- int wantmagic; +- struct xfs_dir2_data_hdr *datahdr; +- +- datahdr = (struct xfs_dir2_data_hdr *)block; +- +- if (offset % mp->m_dirblkfsbs != 0) +- return; /* corrupted, leave it alone */ +- +- dir_data.bad_block = 0; +- +- if (is_block_format) { +- xfs_dir2_leaf_entry_t *blp; +- xfs_dir2_block_tail_t *btp; +- +- btp = xfs_dir2_block_tail_p(mp, datahdr); +- blp = xfs_dir2_block_leaf_p(btp); +- if ((char *)blp > (char *)btp) +- blp = (xfs_dir2_leaf_entry_t *)btp; +- +- dir_data.end_of_data = (char *)blp - block; +- wantmagic = XFS_DIR2_BLOCK_MAGIC; +- } else { /* leaf/node format */ +- dir_data.end_of_data = mp->m_dirblkfsbs << +- mp->m_sb.sb_blocklog; +- wantmagic = XFS_DIR2_DATA_MAGIC; +- } +- dir_data.offset_to_entry = +- xfs_dir3_data_entry_offset(datahdr); ++ int dir_offset; ++ char *ptr; ++ char *endptr; ++ int end_of_data; ++ int wantmagic; ++ struct xfs_dir2_data_hdr *datahdr; ++ ++ datahdr = (struct xfs_dir2_data_hdr *)block; ++ ++ if (offset % mp->m_dirblkfsbs != 0) ++ return; /* corrupted, leave it alone */ ++ ++ if (is_block_format) { ++ xfs_dir2_leaf_entry_t *blp; ++ xfs_dir2_block_tail_t *btp; ++ ++ btp = xfs_dir2_block_tail_p(mp, datahdr); ++ blp = xfs_dir2_block_leaf_p(btp); ++ if ((char *)blp > (char *)btp) ++ blp = (xfs_dir2_leaf_entry_t *)btp; ++ ++ end_of_data = (char *)blp - block; ++ if (xfs_sb_version_hascrc(&mp->m_sb)) ++ wantmagic = XFS_DIR3_BLOCK_MAGIC; ++ else ++ wantmagic = XFS_DIR2_BLOCK_MAGIC; ++ } else { /* leaf/node format */ ++ end_of_data = mp->m_dirblkfsbs << mp->m_sb.sb_blocklog; ++ if (xfs_sb_version_hascrc(&mp->m_sb)) ++ wantmagic = XFS_DIR3_DATA_MAGIC; ++ else ++ wantmagic = XFS_DIR2_DATA_MAGIC; ++ } + +- if (be32_to_cpu(datahdr->magic) != wantmagic) { +- if (show_warnings) +- print_warning("invalid magic in dir " +- "inode %llu block %ld", +- (long long)cur_ino, +- (long)offset); +- dir_data.bad_block = 1; +- } +- } +- dir_data.block_index++; +- if (dir_data.block_index == mp->m_dirblkfsbs) +- dir_data.block_index = 0; ++ if (be32_to_cpu(datahdr->magic) != wantmagic) { ++ if (show_warnings) ++ print_warning( ++ "invalid magic in dir inode %llu block %ld", ++ (long long)cur_ino, (long)offset); ++ return; ++ } + +- if (dir_data.bad_block) +- continue; ++ dir_offset = xfs_dir3_data_entry_offset(datahdr); ++ ptr = block + dir_offset; ++ endptr = block + mp->m_sb.sb_blocksize; + +- dir_offset = (dir_data.block_index << mp->m_sb.sb_blocklog) + +- dir_data.offset_to_entry; +- +- ptr = endptr + dir_data.offset_to_entry; +- endptr += mp->m_sb.sb_blocksize; +- +- while (ptr < endptr && dir_offset < dir_data.end_of_data) { +- xfs_dir2_data_entry_t *dep; +- xfs_dir2_data_unused_t *dup; +- int length; +- +- dup = (xfs_dir2_data_unused_t *)ptr; +- +- if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { +- int length = be16_to_cpu(dup->length); +- if (dir_offset + length > dir_data.end_of_data || +- length == 0 || (length & +- (XFS_DIR2_DATA_ALIGN - 1))) { +- if (show_warnings) +- print_warning("invalid length " +- "for dir free space in " +- "inode %llu", +- (long long)cur_ino); +- dir_data.bad_block = 1; +- break; +- } +- if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != +- dir_offset) { +- dir_data.bad_block = 1; +- break; +- } +- dir_offset += length; +- ptr += length; +- if (dir_offset >= dir_data.end_of_data || +- ptr >= endptr) +- break; +- } ++ while (ptr < endptr && dir_offset < end_of_data) { ++ xfs_dir2_data_entry_t *dep; ++ xfs_dir2_data_unused_t *dup; ++ int length; + +- dep = (xfs_dir2_data_entry_t *)ptr; +- length = xfs_dir3_data_entsize(mp, dep->namelen); ++ dup = (xfs_dir2_data_unused_t *)ptr; + +- if (dir_offset + length > dir_data.end_of_data || +- ptr + length > endptr) { ++ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { ++ int length = be16_to_cpu(dup->length); ++ if (dir_offset + length > end_of_data || ++ !length || (length & (XFS_DIR2_DATA_ALIGN - 1))) { + if (show_warnings) +- print_warning("invalid length for " +- "dir entry name in inode %llu", ++ print_warning( ++ "invalid length for dir free space in inode %llu", + (long long)cur_ino); +- break; ++ return; + } +- if (be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) != +- dir_offset) { +- dir_data.bad_block = 1; +- break; +- } +- generate_obfuscated_name(be64_to_cpu(dep->inumber), +- dep->namelen, &dep->name[0]); ++ if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != ++ dir_offset) ++ return; + dir_offset += length; + ptr += length; ++ if (dir_offset >= end_of_data || ptr >= endptr) ++ return; ++ } ++ ++ dep = (xfs_dir2_data_entry_t *)ptr; ++ length = xfs_dir3_data_entsize(mp, dep->namelen); ++ ++ if (dir_offset + length > end_of_data || ++ ptr + length > endptr) { ++ if (show_warnings) ++ print_warning( ++ "invalid length for dir entry name in inode %llu", ++ (long long)cur_ino); ++ return; + } +- dir_data.offset_to_entry = dir_offset & +- (mp->m_sb.sb_blocksize - 1); ++ if (be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) != ++ dir_offset) ++ return; ++ generate_obfuscated_name(be64_to_cpu(dep->inumber), ++ dep->namelen, &dep->name[0]); ++ dir_offset += length; ++ ptr += length; + } + } + + static void +-obfuscate_symlink_blocks( +- char *block, +- xfs_dfilblks_t count) ++obfuscate_symlink_block( ++ char *block) + { +- count <<= mp->m_sb.sb_blocklog; +- obfuscate_path_components(block, count); ++ /* XXX: need to handle CRC headers */ ++ obfuscate_path_components(block, mp->m_sb.sb_blocksize); + } + + #define MAX_REMOTE_VALS 4095 +@@ -1252,86 +1261,227 @@ add_remote_vals( + blockidx++; + length -= XFS_LBSIZE(mp); + } ++ ++ if (attr_data.remote_val_count >= MAX_REMOTE_VALS) { ++ print_warning( ++"Overflowed attr obfuscation array. No longer obfuscating remote attrs."); ++ } + } + + static void +-obfuscate_attr_blocks( ++obfuscate_attr_block( + char *block, +- xfs_dfiloff_t offset, +- xfs_dfilblks_t count) ++ xfs_dfiloff_t offset) + { + xfs_attr_leafblock_t *leaf; +- int c; + int i; + int nentries; + xfs_attr_leaf_entry_t *entry; + xfs_attr_leaf_name_local_t *local; + xfs_attr_leaf_name_remote_t *remote; + +- for (c = 0; c < count; c++, offset++, block += XFS_LBSIZE(mp)) { ++ leaf = (xfs_attr_leafblock_t *)block; + +- leaf = (xfs_attr_leafblock_t *)block; +- +- if (be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC) { +- for (i = 0; i < attr_data.remote_val_count; i++) { +- if (attr_data.remote_vals[i] == offset) +- memset(block, 0, XFS_LBSIZE(mp)); +- } +- continue; ++ if (be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC) { ++ for (i = 0; i < attr_data.remote_val_count; i++) { ++ /* XXX: need to handle CRC headers */ ++ if (attr_data.remote_vals[i] == offset) ++ memset(block, 0, XFS_LBSIZE(mp)); + } ++ return; ++ } + +- nentries = be16_to_cpu(leaf->hdr.count); +- if (nentries * sizeof(xfs_attr_leaf_entry_t) + +- sizeof(xfs_attr_leaf_hdr_t) > XFS_LBSIZE(mp)) { ++ nentries = be16_to_cpu(leaf->hdr.count); ++ if (nentries * sizeof(xfs_attr_leaf_entry_t) + ++ sizeof(xfs_attr_leaf_hdr_t) > XFS_LBSIZE(mp)) { ++ if (show_warnings) ++ print_warning("invalid attr count in inode %llu", ++ (long long)cur_ino); ++ return; ++ } ++ ++ for (i = 0, entry = &leaf->entries[0]; i < nentries; i++, entry++) { ++ if (be16_to_cpu(entry->nameidx) > XFS_LBSIZE(mp)) { + if (show_warnings) +- print_warning("invalid attr count in inode %llu", ++ print_warning( ++ "invalid attr nameidx in inode %llu", + (long long)cur_ino); +- continue; ++ break; + } +- +- for (i = 0, entry = &leaf->entries[0]; i < nentries; +- i++, entry++) { +- if (be16_to_cpu(entry->nameidx) > XFS_LBSIZE(mp)) { ++ if (entry->flags & XFS_ATTR_LOCAL) { ++ local = xfs_attr3_leaf_name_local(leaf, i); ++ if (local->namelen == 0) { + if (show_warnings) +- print_warning("invalid attr nameidx " +- "in inode %llu", +- (long long)cur_ino); ++ print_warning( ++ "zero length for attr name in inode %llu", ++ (long long)cur_ino); + break; + } +- if (entry->flags & XFS_ATTR_LOCAL) { +- local = xfs_attr3_leaf_name_local(leaf, i); +- if (local->namelen == 0) { +- if (show_warnings) +- print_warning("zero length for " +- "attr name in inode %llu", +- (long long)cur_ino); +- break; +- } +- generate_obfuscated_name(0, local->namelen, +- &local->nameval[0]); +- memset(&local->nameval[local->namelen], 0, +- be16_to_cpu(local->valuelen)); +- } else { +- remote = xfs_attr3_leaf_name_remote(leaf, i); +- if (remote->namelen == 0 || +- remote->valueblk == 0) { +- if (show_warnings) +- print_warning("invalid attr " +- "entry in inode %llu", +- (long long)cur_ino); +- break; +- } +- generate_obfuscated_name(0, remote->namelen, +- &remote->name[0]); +- add_remote_vals(be32_to_cpu(remote->valueblk), ++ generate_obfuscated_name(0, local->namelen, ++ &local->nameval[0]); ++ memset(&local->nameval[local->namelen], 0, ++ be16_to_cpu(local->valuelen)); ++ } else { ++ remote = xfs_attr3_leaf_name_remote(leaf, i); ++ if (remote->namelen == 0 || remote->valueblk == 0) { ++ if (show_warnings) ++ print_warning( ++ "invalid attr entry in inode %llu", ++ (long long)cur_ino); ++ break; ++ } ++ generate_obfuscated_name(0, remote->namelen, ++ &remote->name[0]); ++ add_remote_vals(be32_to_cpu(remote->valueblk), + be32_to_cpu(remote->valuelen)); ++ } ++ } ++} ++ ++static int ++process_single_fsb_objects( ++ xfs_dfiloff_t o, ++ xfs_dfsbno_t s, ++ xfs_dfilblks_t c, ++ typnm_t btype, ++ xfs_dfiloff_t last) ++{ ++ char *dp; ++ int ret = 0; ++ int i; ++ ++ for (i = 0; i < c; i++) { ++ push_cur(); ++ set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, s), blkbb, ++ DB_RING_IGN, NULL); ++ ++ if (!iocur_top->data) { ++ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, s); ++ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, s); ++ ++ print_warning("cannot read %s block %u/%u (%llu)", ++ typtab[btype].name, agno, agbno, s); ++ if (stop_on_read_error) ++ ret = -EIO; ++ goto out_pop; ++ ++ } ++ ++ if (dont_obfuscate) ++ goto write; ++ ++ dp = iocur_top->data; ++ switch (btype) { ++ case TYP_DIR2: ++ if (o >= mp->m_dirleafblk) ++ break; ++ ++ obfuscate_dir_data_block(dp, o, ++ last == mp->m_dirblkfsbs); ++ iocur_top->need_crc = 1; ++ break; ++ case TYP_SYMLINK: ++ obfuscate_symlink_block(dp); ++ iocur_top->need_crc = 1; ++ break; ++ case TYP_ATTR: ++ obfuscate_attr_block(dp, o); ++ iocur_top->need_crc = 1; ++ break; ++ default: ++ break; ++ } ++ ++write: ++ ret = write_buf(iocur_top); ++out_pop: ++ pop_cur(); ++ if (ret) ++ break; ++ o++; ++ s++; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Static map to aggregate multiple extents into a single directory block. ++ */ ++static struct bbmap mfsb_map; ++static int mfsb_length; ++ ++static int ++process_multi_fsb_objects( ++ xfs_dfiloff_t o, ++ xfs_dfsbno_t s, ++ xfs_dfilblks_t c, ++ typnm_t btype, ++ xfs_dfiloff_t last) ++{ ++ int ret = 0; ++ ++ switch (btype) { ++ case TYP_DIR2: ++ break; ++ default: ++ print_warning("bad type for multi-fsb object %d", btype); ++ return -EINVAL; ++ } ++ ++ while (c > 0) { ++ unsigned int bm_len; ++ ++ if (mfsb_length + c >= mp->m_dirblkfsbs) { ++ bm_len = mp->m_dirblkfsbs - mfsb_length; ++ mfsb_length = 0; ++ } else { ++ mfsb_length += c; ++ bm_len = c; ++ } ++ ++ mfsb_map.b[mfsb_map.nmaps].bm_bn = XFS_FSB_TO_DADDR(mp, s); ++ mfsb_map.b[mfsb_map.nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len); ++ mfsb_map.nmaps++; ++ ++ if (mfsb_length == 0) { ++ push_cur(); ++ set_cur(&typtab[btype], 0, 0, DB_RING_IGN, &mfsb_map); ++ if (!iocur_top->data) { ++ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, s); ++ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, s); ++ ++ print_warning("cannot read %s block %u/%u (%llu)", ++ typtab[btype].name, agno, agbno, s); ++ if (stop_on_read_error) ++ ret = -1; ++ goto out_pop; ++ ++ } ++ ++ if (dont_obfuscate || o >= mp->m_dirleafblk) { ++ ret = write_buf(iocur_top); ++ goto out_pop; + } ++ ++ obfuscate_dir_data_block(iocur_top->data, o, ++ last == mp->m_dirblkfsbs); ++ iocur_top->need_crc = 1; ++ ret = write_buf(iocur_top); ++out_pop: ++ pop_cur(); ++ mfsb_map.nmaps = 0; ++ if (ret) ++ break; + } ++ c -= bm_len; ++ s += bm_len; + } ++ ++ return ret; + } + + /* inode copy routines */ +- + static int + process_bmbt_reclist( + xfs_bmbt_rec_t *rp, +@@ -1346,6 +1496,7 @@ process_bmbt_reclist( + xfs_dfiloff_t last; + xfs_agnumber_t agno; + xfs_agblock_t agbno; ++ int error; + + if (btype == TYP_DATA) + return 1; +@@ -1407,44 +1558,14 @@ process_bmbt_reclist( + break; + } + +- push_cur(); +- set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, s), c * blkbb, +- DB_RING_IGN, NULL); +- if (iocur_top->data == NULL) { +- print_warning("cannot read %s block %u/%u (%llu)", +- typtab[btype].name, agno, agbno, s); +- if (stop_on_read_error) { +- pop_cur(); +- return 0; +- } ++ /* multi-extent blocks require special handling */ ++ if (btype != TYP_DIR2 || mp->m_dirblkfsbs == 1) { ++ error = process_single_fsb_objects(o, s, c, btype, last); + } else { +- if (!dont_obfuscate) +- switch (btype) { +- case TYP_DIR2: +- if (o < mp->m_dirleafblk) +- obfuscate_dir_data_blocks( +- iocur_top->data, o, c, +- last == mp->m_dirblkfsbs); +- break; +- +- case TYP_SYMLINK: +- obfuscate_symlink_blocks( +- iocur_top->data, c); +- break; +- +- case TYP_ATTR: +- obfuscate_attr_blocks(iocur_top->data, +- o, c); +- break; +- +- default: ; +- } +- if (!write_buf(iocur_top)) { +- pop_cur(); +- return 0; +- } ++ error = process_multi_fsb_objects(o, s, c, btype, last); + } +- pop_cur(); ++ if (error) ++ return 0; + } + + return 1; +@@ -1626,6 +1747,13 @@ process_inode_data( + return 1; + } + ++/* ++ * when we process the inode, we may change the data in the data and/or ++ * attribute fork if they are in short form and we are obfuscating names. ++ * In this case we need to recalculate the CRC of the inode, but we should ++ * only do that if the CRC in the inode is good to begin with. If the crc ++ * is not ok, we just leave it alone. ++ */ + static int + process_inode( + xfs_agnumber_t agno, +@@ -1633,18 +1761,30 @@ process_inode( + xfs_dinode_t *dip) + { + int success; ++ bool crc_was_ok = false; /* no recalc by default */ ++ bool need_new_crc = false; + + success = 1; + cur_ino = XFS_AGINO_TO_INO(mp, agno, agino); + ++ /* we only care about crc recalculation if we are obfuscating names. */ ++ if (!dont_obfuscate) { ++ crc_was_ok = xfs_verify_cksum((char *)dip, ++ mp->m_sb.sb_inodesize, ++ offsetof(struct xfs_dinode, di_crc)); ++ } ++ + /* copy appropriate data fork metadata */ + switch (be16_to_cpu(dip->di_mode) & S_IFMT) { + case S_IFDIR: +- memset(&dir_data, 0, sizeof(dir_data)); + success = process_inode_data(dip, TYP_DIR2); ++ if (dip->di_format == XFS_DINODE_FMT_LOCAL) ++ need_new_crc = 1; + break; + case S_IFLNK: + success = process_inode_data(dip, TYP_SYMLINK); ++ if (dip->di_format == XFS_DINODE_FMT_LOCAL) ++ need_new_crc = 1; + break; + case S_IFREG: + success = process_inode_data(dip, TYP_DATA); +@@ -1659,6 +1799,7 @@ process_inode( + attr_data.remote_val_count = 0; + switch (dip->di_aformat) { + case XFS_DINODE_FMT_LOCAL: ++ need_new_crc = 1; + if (!dont_obfuscate) + obfuscate_sf_attr(dip); + break; +@@ -1673,6 +1814,9 @@ process_inode( + } + nametable_clear(); + } ++ ++ if (crc_was_ok && need_new_crc) ++ xfs_dinode_calc_crc(mp, dip); + return success; + } + +@@ -1743,12 +1887,9 @@ copy_inode_chunk( + + if (!process_inode(agno, agino + i, dip)) + goto pop_out; +- +- /* calculate the new CRC for the inode */ +- xfs_dinode_calc_crc(mp, dip); + } + skip_processing: +- if (!write_buf(iocur_top)) ++ if (write_buf(iocur_top)) + goto pop_out; + + inodes_copied += XFS_INODES_PER_CHUNK; +@@ -1866,7 +2007,7 @@ scan_ag( + if (stop_on_read_error) + goto pop_out; + } else { +- if (!write_buf(iocur_top)) ++ if (write_buf(iocur_top)) + goto pop_out; + } + +@@ -1881,7 +2022,7 @@ scan_ag( + if (stop_on_read_error) + goto pop_out; + } else { +- if (!write_buf(iocur_top)) ++ if (write_buf(iocur_top)) + goto pop_out; + } + +@@ -1896,7 +2037,7 @@ scan_ag( + if (stop_on_read_error) + goto pop_out; + } else { +- if (!write_buf(iocur_top)) ++ if (write_buf(iocur_top)) + goto pop_out; + } + +@@ -1910,7 +2051,7 @@ scan_ag( + if (stop_on_read_error) + goto pop_out; + } else { +- if (!write_buf(iocur_top)) ++ if (write_buf(iocur_top)) + goto pop_out; + } + +@@ -2015,7 +2156,7 @@ copy_log(void) + print_warning("cannot read log"); + return !stop_on_read_error; + } +- return write_buf(iocur_top); ++ return !write_buf(iocur_top); + } + + static int +@@ -2121,7 +2262,7 @@ metadump_f( + + /* write the remaining index */ + if (!exitcode) +- exitcode = !write_index(); ++ exitcode = write_index() < 0; + + if (progress_since_warning) + fputc('\n', (outf == stdout) ? stderr : stdout); +diff --git a/db/write.c b/db/write.c +index 091ddb3..7b34fc0 100644 +--- a/db/write.c ++++ b/db/write.c +@@ -439,55 +439,78 @@ convert_oct( + + #define NYBBLE(x) (isdigit(x)?(x-'0'):(tolower(x)-'a'+0xa)) + ++/* ++ * convert_arg allows input in the following forms: ++ * ++ * - A string ("ABTB") whose ASCII value is placed in an array in the order ++ * matching the input. ++ * ++ * - An even number of hex numbers. If the length is greater than 64 bits, ++ * then the output is an array of bytes whose top nibble is the first hex ++ * digit in the input, the lower nibble is the second hex digit in the ++ * input. UUID entries are entered in this manner. ++ * ++ * - A decimal or hexadecimal integer to be used with setbitval(). ++ * ++ * Numbers that are passed to setbitval() need to be in big endian format and ++ * are adjusted in the buffer so that the first input bit is to be be written to ++ * the first bit in the output. ++ */ + static char * + convert_arg( +- char *arg, +- int bit_length) ++ char *arg, ++ int bit_length) + { +- int i; +- static char *buf = NULL; +- char *rbuf; +- long long *value; +- int alloc_size; +- char *ostr; +- int octval, ret; ++ int i; ++ int alloc_size; ++ int octval; ++ int offset; ++ int ret; ++ static char *buf = NULL; ++ char *endp; ++ char *rbuf; ++ char *ostr; ++ __u64 *value; ++ __u64 val = 0; + + if (bit_length <= 64) + alloc_size = 8; + else +- alloc_size = (bit_length+7)/8; ++ alloc_size = (bit_length + 7) / 8; + + buf = xrealloc(buf, alloc_size); + memset(buf, 0, alloc_size); +- value = (long long *)buf; ++ value = (__u64 *)buf; + rbuf = buf; + + if (*arg == '\"') { +- /* handle strings */ ++ /* input a string and output ASCII array of characters */ + + /* zap closing quote if there is one */ +- if ((ostr = strrchr(arg+1, '\"')) != NULL) ++ ostr = strrchr(arg + 1, '\"'); ++ if (ostr) + *ostr = '\0'; + +- ostr = arg+1; ++ ostr = arg + 1; + for (i = 0; i < alloc_size; i++) { + if (!*ostr) + break; + +- /* do octal */ ++ /* do octal conversion */ + if (*ostr == '\\') { +- if (*(ostr+1) >= '0' || *(ostr+1) <= '7') { +- ret = convert_oct(ostr+1, &octval); ++ if (*(ostr + 1) >= '0' || *(ostr + 1) <= '7') { ++ ret = convert_oct(ostr + 1, &octval); + *rbuf++ = octval; +- ostr += ret+1; ++ ostr += ret + 1; + continue; + } + } + *rbuf++ = *ostr++; + } +- + return buf; +- } else if (arg[0] == '#' || ((arg[0] != '-') && strchr(arg,'-'))) { ++ } ++ ++ if (arg[0] == '#' || ((arg[0] != '-') && strchr(arg,'-'))) { + /* + * handle hex blocks ie + * #00112233445566778899aabbccddeeff +@@ -496,48 +519,79 @@ convert_arg( + * + * (but if it starts with "-" assume it's just an integer) + */ +- int bytes=bit_length/8; ++ int bytes = bit_length / NBBY; ++ ++ /* is this an array of hec numbers? */ ++ if (bit_length % NBBY) ++ return NULL; + + /* skip leading hash */ +- if (*arg=='#') arg++; ++ if (*arg == '#') ++ arg++; + + while (*arg && bytes--) { +- /* skip hypens */ +- while (*arg=='-') arg++; +- +- /* get first nybble */ +- if (!isxdigit((int)*arg)) return NULL; +- *rbuf=NYBBLE((int)*arg)<<4; +- arg++; +- +- /* skip more hyphens */ +- while (*arg=='-') arg++; +- +- /* get second nybble */ +- if (!isxdigit((int)*arg)) return NULL; +- *rbuf++|=NYBBLE((int)*arg); +- arg++; ++ /* skip hypens */ ++ while (*arg == '-') ++ arg++; ++ ++ /* get first nybble */ ++ if (!isxdigit((int)*arg)) ++ return NULL; ++ *rbuf = NYBBLE((int)*arg) << 4; ++ arg++; ++ ++ /* skip more hyphens */ ++ while (*arg == '-') ++ arg++; ++ ++ /* get second nybble */ ++ if (!isxdigit((int)*arg)) ++ return NULL; ++ *rbuf++ |= NYBBLE((int)*arg); ++ arg++; + } +- if (bytes<0&&*arg) return NULL; ++ if (bytes < 0 && *arg) ++ return NULL; ++ + return buf; +- } else { +- /* +- * handle integers +- */ +- *value = strtoll(arg, NULL, 0); +- +-#if __BYTE_ORDER == BIG_ENDIAN +- /* hackery for big endian */ +- if (bit_length <= 8) { +- rbuf += 7; +- } else if (bit_length <= 16) { +- rbuf += 6; +- } else if (bit_length <= 32) { +- rbuf += 4; +- } +-#endif +- return rbuf; + } ++ ++ /* handle decimal / hexadecimal integers */ ++ val = strtoll(arg, &endp, 0); ++ /* return if not a clean number */ ++ if (*endp != '\0') ++ return NULL; ++ ++ /* Does the value fit into the range of the destination bitfield? */ ++ if ((val >> bit_length) > 0) ++ return NULL; ++ /* ++ * If the length of the field is not a multiple of a byte, push ++ * the bits up in the field, so the most signicant field bit is ++ * the most significant bit in the byte: ++ * ++ * before: ++ * val |----|----|----|----|----|--MM|mmmm|llll| ++ * after ++ * val |----|----|----|----|----|MMmm|mmll|ll00| ++ */ ++ offset = bit_length % NBBY; ++ if (offset) ++ val <<= (NBBY - offset); ++ ++ /* ++ * convert to big endian and copy into the array ++ * rbuf |----|----|----|----|----|MMmm|mmll|ll00| ++ */ ++ *value = cpu_to_be64(val); ++ ++ /* ++ * Align the array to point to the field in the array. ++ * rbuf = |MMmm|mmll|ll00| ++ */ ++ offset = sizeof(__be64) - 1 - ((bit_length - 1) / sizeof(__be64)); ++ rbuf += offset; ++ return rbuf; + } + + +@@ -550,9 +604,9 @@ write_struct( + { + const ftattr_t *fa; + flist_t *fl; +- flist_t *sfl; +- int bit_length; +- char *buf; ++ flist_t *sfl; ++ int bit_length; ++ char *buf; + int parentoffset; + + if (argc != 2) { +diff --git a/db/xfs_metadump.sh b/db/xfs_metadump.sh +index 28b04b8..a95d5a5 100755 +--- a/db/xfs_metadump.sh ++++ b/db/xfs_metadump.sh +@@ -5,9 +5,9 @@ + + OPTS=" " + DBOPTS=" " +-USAGE="Usage: xfs_metadump [-efogwV] [-m max_extents] [-l logdev] source target" ++USAGE="Usage: xfs_metadump [-efFogwV] [-m max_extents] [-l logdev] source target" + +-while getopts "efgl:m:owV" c ++while getopts "efgl:m:owFV" c + do + case $c in + e) OPTS=$OPTS"-e ";; +@@ -17,6 +17,7 @@ do + w) OPTS=$OPTS"-w ";; + f) DBOPTS=$DBOPTS" -f";; + l) DBOPTS=$DBOPTS" -l "$OPTARG" ";; ++ F) DBOPTS=$DBOPTS" -F";; + V) xfs_db -p xfs_metadump -V + status=$? + exit $status +@@ -29,7 +30,7 @@ done + set -- extra $@ + shift $OPTIND + case $# in +- 2) xfs_db$DBOPTS -F -i -p xfs_metadump -c "metadump$OPTS $2" $1 ++ 2) xfs_db$DBOPTS -i -p xfs_metadump -c "metadump$OPTS $2" $1 + status=$? + ;; + *) echo $USAGE 1>&2 +diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c +index 2df68fb..fb7eda8 100644 +--- a/growfs/xfs_growfs.c ++++ b/growfs/xfs_growfs.c +@@ -189,7 +189,7 @@ main(int argc, char **argv) + usage(); + if (iflag && xflag) + usage(); +- if (dflag + lflag + rflag == 0) ++ if (dflag + lflag + rflag + mflag == 0) + aflag = 1; + + fs_table_initialise(0, NULL, 0, NULL); +@@ -305,12 +305,15 @@ main(int argc, char **argv) + drsize -= (drsize % 2); + + error = 0; +- if (dflag | aflag) { ++ ++ if (dflag | mflag | aflag) { + xfs_growfs_data_t in; + + if (!mflag) + maxpct = geo.imaxpct; +- if (!dsize) ++ if (!dflag && !aflag) /* Only mflag, no data size change */ ++ dsize = geo.datablocks; ++ else if (!dsize) + dsize = ddsize / (geo.blocksize / BBSIZE); + else if (dsize > ddsize / (geo.blocksize / BBSIZE)) { + fprintf(stderr, _( +diff --git a/include/cache.h b/include/cache.h +index 76cb234..0a84c69 100644 +--- a/include/cache.h ++++ b/include/cache.h +@@ -66,7 +66,8 @@ typedef void (*cache_walk_t)(struct cache_node *); + typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t); + typedef void (*cache_node_flush_t)(struct cache_node *); + typedef void (*cache_node_relse_t)(struct cache_node *); +-typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int); ++typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int, ++ unsigned int); + typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t); + typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *); + +@@ -112,6 +113,7 @@ struct cache { + cache_node_compare_t compare; /* comparison routine */ + cache_bulk_relse_t bulkrelse; /* bulk release routine */ + unsigned int c_hashsize; /* hash bucket count */ ++ unsigned int c_hashshift; /* hash key shift */ + struct cache_hash *c_hash; /* hash table buckets */ + struct cache_mru c_mrus[CACHE_MAX_PRIORITY + 1]; + unsigned long long c_misses; /* cache misses */ +diff --git a/include/darwin.h b/include/darwin.h +index 97b8990..95f865b 100644 +--- a/include/darwin.h ++++ b/include/darwin.h +@@ -150,6 +150,7 @@ typedef unsigned char uchar_t; + + #define ENOATTR 989 /* Attribute not found */ + #define EFSCORRUPTED 990 /* Filesystem is corrupted */ ++#define EFSBADCRC 991 /* Bad CRC detected */ + #define constpp char * const * + + #define HAVE_FID 1 +diff --git a/include/freebsd.h b/include/freebsd.h +index 2e1ae49..b51688b 100644 +--- a/include/freebsd.h ++++ b/include/freebsd.h +@@ -45,6 +45,7 @@ + #define constpp char * const * + + #define EFSCORRUPTED 990 /* Filesystem is corrupted */ ++#define EFSBADCRC 991 /* Bad CRC detected */ + + typedef off_t xfs_off_t; + typedef off_t off64_t; +diff --git a/include/gnukfreebsd.h b/include/gnukfreebsd.h +index 1ec291f..2140acd 100644 +--- a/include/gnukfreebsd.h ++++ b/include/gnukfreebsd.h +@@ -36,6 +36,7 @@ + #define constpp char * const * + + #define EFSCORRUPTED 990 /* Filesystem is corrupted */ ++#define EFSBADCRC 991 /* Bad CRC detected */ + + typedef off_t xfs_off_t; + typedef __uint64_t xfs_ino_t; +diff --git a/include/irix.h b/include/irix.h +index a450684..5040451 100644 +--- a/include/irix.h ++++ b/include/irix.h +@@ -52,6 +52,8 @@ typedef char* xfs_caddr_t; + #define xfs_flock64 flock64 + #define xfs_flock64_t struct flock64 + ++#define EFSBADCRC 991 /* Bad CRC detected */ ++ + typedef struct xfs_error_injection { + __int32_t fd; + __int32_t errtag; +diff --git a/include/libxfs.h b/include/libxfs.h +index 4bf331c..6bc6c94 100644 +--- a/include/libxfs.h ++++ b/include/libxfs.h +@@ -144,6 +144,7 @@ extern void libxfs_device_close (dev_t); + extern int libxfs_device_alignment (void); + extern void libxfs_report(FILE *); + extern void platform_findsizes(char *path, int fd, long long *sz, int *bsz); ++extern int platform_nproc(void); + + /* check or write log footer: specify device, log size in blocks & uuid */ + typedef xfs_caddr_t (libxfs_get_block_t)(xfs_caddr_t, int, void *); +@@ -364,7 +365,7 @@ enum xfs_buf_flags_t { /* b_flags bits */ + #define XFS_BUF_PRIORITY(bp) (cache_node_get_priority( \ + (struct cache_node *)(bp))) + #define xfs_buf_set_ref(bp,ref) ((void) 0) +-#define xfs_buf_ioerror(bp,err) (bp)->b_error = (err); ++#define xfs_buf_ioerror(bp,err) ((bp)->b_error = (err)) + + #define xfs_daddr_to_agno(mp,d) \ + ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) +@@ -392,9 +393,9 @@ extern struct cache_operations libxfs_bcache_operations; + #define libxfs_getbuf(dev, daddr, len) \ + libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \ + (dev), (daddr), (len)) +-#define libxfs_getbuf_map(dev, map, nmaps) \ ++#define libxfs_getbuf_map(dev, map, nmaps, flags) \ + libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \ +- (dev), (map), (nmaps)) ++ (dev), (map), (nmaps), (flags)) + #define libxfs_getbuf_flags(dev, daddr, len, flags) \ + libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \ + (dev), (daddr), (len), (flags)) +@@ -412,7 +413,7 @@ extern int libxfs_trace_writebuf(const char *, const char *, int, + extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, + struct xfs_buftarg *, xfs_daddr_t, int); + extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int, +- struct xfs_buftarg *, struct xfs_buf_map *, int); ++ struct xfs_buftarg *, struct xfs_buf_map *, int, int); + extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int, + struct xfs_buftarg *, xfs_daddr_t, int, unsigned int); + extern void libxfs_trace_putbuf (const char *, const char *, int, +@@ -427,7 +428,7 @@ extern xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, + extern int libxfs_writebuf(xfs_buf_t *, int); + extern xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int); + extern xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, +- struct xfs_buf_map *, int); ++ struct xfs_buf_map *, int, int); + extern xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, + int, unsigned int); + extern void libxfs_putbuf (xfs_buf_t *); +@@ -448,8 +449,7 @@ extern void libxfs_putbufr(xfs_buf_t *); + extern int libxfs_writebuf_int(xfs_buf_t *, int); + extern int libxfs_writebufr(struct xfs_buf *); + extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, xfs_buf_t *, int, int); +-extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, +- struct xfs_buf_map *, int, int); ++extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int); + + extern int libxfs_bhash_size; + +@@ -779,6 +779,20 @@ extern uint32_t crc32c_le(uint32_t crc, unsigned char const *p, size_t len); + + #include + ++static inline int ++xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) ++{ ++ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), ++ cksum_offset); ++} ++ ++static inline void ++xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) ++{ ++ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), ++ cksum_offset); ++} ++ + #define xfs_notice(mp,fmt,args...) cmn_err(CE_NOTE,fmt, ## args) + #define xfs_warn(mp,fmt,args...) cmn_err(CE_WARN,fmt, ## args) + #define xfs_alert(mp,fmt,args...) cmn_err(CE_ALERT,fmt, ## args) +diff --git a/include/linux.h b/include/linux.h +index 502fd1f..5586290 100644 +--- a/include/linux.h ++++ b/include/linux.h +@@ -136,6 +136,7 @@ platform_discard_blocks(int fd, uint64_t start, uint64_t len) + + #define ENOATTR ENODATA /* Attribute not found */ + #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ ++#define EFSBADCRC EBADMSG /* Bad CRC detected */ + + typedef loff_t xfs_off_t; + typedef __uint64_t xfs_ino_t; +diff --git a/include/xfs_ag.h b/include/xfs_ag.h +index 3fc1098..0fdd410 100644 +--- a/include/xfs_ag.h ++++ b/include/xfs_ag.h +@@ -89,6 +89,8 @@ typedef struct xfs_agf { + /* structure must be padded to 64 bit alignment */ + } xfs_agf_t; + ++#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) ++ + #define XFS_AGF_MAGICNUM 0x00000001 + #define XFS_AGF_VERSIONNUM 0x00000002 + #define XFS_AGF_SEQNO 0x00000004 +@@ -167,6 +169,8 @@ typedef struct xfs_agi { + /* structure must be padded to 64 bit alignment */ + } xfs_agi_t; + ++#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) ++ + #define XFS_AGI_MAGICNUM 0x00000001 + #define XFS_AGI_VERSIONNUM 0x00000002 + #define XFS_AGI_SEQNO 0x00000004 +@@ -222,6 +226,8 @@ typedef struct xfs_agfl { + __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ + } xfs_agfl_t; + ++#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) ++ + /* + * tags for inode radix tree + */ +diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h +index e5869b5..623bbe8 100644 +--- a/include/xfs_dinode.h ++++ b/include/xfs_dinode.h +@@ -89,6 +89,8 @@ typedef struct xfs_dinode { + /* structure must be padded to 64 bit alignment */ + } xfs_dinode_t; + ++#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) ++ + #define DI_MAX_FLUSH 0xffff + + /* +diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h +index 9910401..3900130 100644 +--- a/include/xfs_dir2.h ++++ b/include/xfs_dir2.h +@@ -57,6 +57,9 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, + extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, + struct xfs_name *name, uint resblks); + ++#define S_SHIFT 12 ++extern const unsigned char xfs_mode_to_ftype[]; ++ + /* + * Direct call from the bmap code, bypassing the generic directory layer. + */ +diff --git a/include/xfs_format.h b/include/xfs_format.h +index 997c770..77f6b8b 100644 +--- a/include/xfs_format.h ++++ b/include/xfs_format.h +@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr { + __be64 sl_lsn; + }; + ++#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) ++ + /* + * The maximum pathlen is 1024 bytes. Since the minimum file system + * blocksize is 512 bytes, we can get a max of 3 extents back from +diff --git a/include/xfs_sb.h b/include/xfs_sb.h +index 35061d4..f7b2fe7 100644 +--- a/include/xfs_sb.h ++++ b/include/xfs_sb.h +@@ -182,6 +182,8 @@ typedef struct xfs_sb { + /* must be padded to 64 bit alignment */ + } xfs_sb_t; + ++#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) ++ + /* + * Superblock - on disk version. Must match the in core version above. + * Must be padded to 64 bit alignment. +diff --git a/io/file.c b/io/file.c +index db85ffc..73b893f 100644 +--- a/io/file.c ++++ b/io/file.c +@@ -36,7 +36,7 @@ print_fileio( + int index, + int braces) + { +- printf(_("%c%03d%c %-14s (%s,%s,%s,%s%s%s%s)\n"), ++ printf(_("%c%03d%c %-14s (%s,%s,%s,%s%s%s%s%s)\n"), + braces? '[' : ' ', index, braces? ']' : ' ', file->name, + file->flags & IO_FOREIGN ? _("foreign") : _("xfs"), + file->flags & IO_OSYNC ? _("sync") : _("non-sync"), +@@ -44,7 +44,8 @@ print_fileio( + file->flags & IO_READONLY ? _("read-only") : _("read-write"), + file->flags & IO_REALTIME ? _(",real-time") : "", + file->flags & IO_APPEND ? _(",append-only") : "", +- file->flags & IO_NONBLOCK ? _(",non-block") : ""); ++ file->flags & IO_NONBLOCK ? _(",non-block") : "", ++ file->flags & IO_TMPFILE ? _(",tmpfile") : ""); + } + + int +diff --git a/io/imap.c b/io/imap.c +index 0a4f14e..4f3f883 100644 +--- a/io/imap.c ++++ b/io/imap.c +@@ -67,7 +67,7 @@ imap_init(void) + imap_cmd.name = "imap"; + imap_cmd.cfunc = imap_f; + imap_cmd.argmin = 0; +- imap_cmd.argmax = 0; ++ imap_cmd.argmax = 1; + imap_cmd.args = _("[nentries]"); + imap_cmd.flags = CMD_NOMAP_OK; + imap_cmd.oneline = _("inode map for filesystem of current file"); +diff --git a/io/init.c b/io/init.c +index ef9e4cb..1e2690e 100644 +--- a/io/init.c ++++ b/io/init.c +@@ -136,7 +136,7 @@ init( + pagesize = getpagesize(); + gettimeofday(&stopwatch, NULL); + +- while ((c = getopt(argc, argv, "ac:dFfmp:nrRstVx")) != EOF) { ++ while ((c = getopt(argc, argv, "ac:dFfmp:nrRstTVx")) != EOF) { + switch (c) { + case 'a': + flags |= IO_APPEND; +@@ -179,6 +179,9 @@ init( + case 'R': + flags |= IO_REALTIME; + break; ++ case 'T': ++ flags |= IO_TMPFILE; ++ break; + case 'x': + expert = 1; + break; +diff --git a/io/io.h b/io/io.h +index 6c3f627..0d2d768 100644 +--- a/io/io.h ++++ b/io/io.h +@@ -35,6 +35,7 @@ + #define IO_TRUNC (1<<6) + #define IO_FOREIGN (1<<7) + #define IO_NONBLOCK (1<<8) ++#define IO_TMPFILE (1<<9) + + /* + * Regular file I/O control +diff --git a/io/open.c b/io/open.c +index cc677e6..c106fa7 100644 +--- a/io/open.c ++++ b/io/open.c +@@ -22,6 +22,22 @@ + #include "init.h" + #include "io.h" + ++#ifndef __O_TMPFILE ++#if defined __alpha__ ++#define __O_TMPFILE 0100000000 ++#elif defined(__hppa__) ++#define __O_TMPFILE 040000000 ++#elif defined(__sparc__) ++#define __O_TMPFILE 0x2000000 ++#else ++#define __O_TMPFILE 020000000 ++#endif ++#endif /* __O_TMPFILE */ ++ ++#ifndef O_TMPFILE ++#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) ++#endif ++ + static cmdinfo_t open_cmd; + static cmdinfo_t stat_cmd; + static cmdinfo_t close_cmd; +@@ -77,13 +93,14 @@ stat_f( + int verbose = (argc == 2 && !strcmp(argv[1], "-v")); + + printf(_("fd.path = \"%s\"\n"), file->name); +- printf(_("fd.flags = %s,%s,%s%s%s%s\n"), ++ printf(_("fd.flags = %s,%s,%s%s%s%s%s\n"), + file->flags & IO_OSYNC ? _("sync") : _("non-sync"), + file->flags & IO_DIRECT ? _("direct") : _("non-direct"), + file->flags & IO_READONLY ? _("read-only") : _("read-write"), + file->flags & IO_REALTIME ? _(",real-time") : "", + file->flags & IO_APPEND ? _(",append-only") : "", +- file->flags & IO_NONBLOCK ? _(",non-block") : ""); ++ file->flags & IO_NONBLOCK ? _(",non-block") : "", ++ file->flags & IO_TMPFILE ? _(",tmpfile") : ""); + if (fstat64(file->fd, &st) < 0) { + perror("fstat64"); + } else { +@@ -143,10 +160,13 @@ openfile( + oflags |= O_TRUNC; + if (flags & IO_NONBLOCK) + oflags |= O_NONBLOCK; ++ if (flags & IO_TMPFILE) ++ oflags |= O_TMPFILE; + + fd = open(path, oflags, mode); + if (fd < 0) { +- if ((errno == EISDIR) && (oflags & O_RDWR)) { ++ if (errno == EISDIR && ++ ((oflags & (O_RDWR|O_TMPFILE)) == O_RDWR)) { + /* make it as if we asked for O_RDONLY & try again */ + oflags &= ~O_RDWR; + oflags |= O_RDONLY; +@@ -248,6 +268,7 @@ open_help(void) + " -s -- open with O_SYNC\n" + " -t -- open with O_TRUNC (truncate the file to zero length if it exists)\n" + " -R -- mark the file as a realtime XFS file immediately after opening it\n" ++" -T -- open with O_TMPFILE (create a file not visible in the namespace)\n" + " Note1: usually read/write direct IO requests must be blocksize aligned;\n" + " some kernels, however, allow sectorsize alignment for direct IO.\n" + " Note2: the bmap for non-regular files can be obtained provided the file\n" +@@ -272,7 +293,7 @@ open_f( + return 0; + } + +- while ((c = getopt(argc, argv, "FRacdfm:nrstx")) != EOF) { ++ while ((c = getopt(argc, argv, "FRTacdfm:nrstx")) != EOF) { + switch (c) { + case 'F': + /* Ignored / deprecated now, handled automatically */ +@@ -310,6 +331,9 @@ open_f( + case 'x': /* backwards compatibility */ + flags |= IO_REALTIME; + break; ++ case 'T': ++ flags |= IO_TMPFILE; ++ break; + default: + return command_usage(&open_cmd); + } +@@ -318,6 +342,11 @@ open_f( + if (optind != argc - 1) + return command_usage(&open_cmd); + ++ if ((flags & (IO_READONLY|IO_TMPFILE)) == (IO_READONLY|IO_TMPFILE)) { ++ fprintf(stderr, _("-T and -r options are incompatible\n")); ++ return -1; ++ } ++ + fd = openfile(argv[optind], &geometry, flags, mode); + if (fd < 0) + return 0; +@@ -731,7 +760,7 @@ open_init(void) + open_cmd.argmin = 0; + open_cmd.argmax = -1; + open_cmd.flags = CMD_NOMAP_OK | CMD_NOFILE_OK | CMD_FOREIGN_OK; +- open_cmd.args = _("[-acdrstx] [path]"); ++ open_cmd.args = _("[-acdrstxT] [path]"); + open_cmd.oneline = _("open the file specified by path"); + open_cmd.help = open_help; + +diff --git a/io/prealloc.c b/io/prealloc.c +index 8380646..aba6b44 100644 +--- a/io/prealloc.c ++++ b/io/prealloc.c +@@ -29,6 +29,14 @@ + #define FALLOC_FL_PUNCH_HOLE 0x02 + #endif + ++#ifndef FALLOC_FL_COLLAPSE_RANGE ++#define FALLOC_FL_COLLAPSE_RANGE 0x08 ++#endif ++ ++#ifndef FALLOC_FL_ZERO_RANGE ++#define FALLOC_FL_ZERO_RANGE 0x10 ++#endif ++ + static cmdinfo_t allocsp_cmd; + static cmdinfo_t freesp_cmd; + static cmdinfo_t resvsp_cmd; +@@ -37,6 +45,8 @@ static cmdinfo_t zero_cmd; + #if defined(HAVE_FALLOCATE) + static cmdinfo_t falloc_cmd; + static cmdinfo_t fpunch_cmd; ++static cmdinfo_t fcollapse_cmd; ++static cmdinfo_t fzero_cmd; + #endif + + static int +@@ -159,8 +169,11 @@ fallocate_f( + int mode = 0; + int c; + +- while ((c = getopt(argc, argv, "kp")) != EOF) { ++ while ((c = getopt(argc, argv, "ckp")) != EOF) { + switch (c) { ++ case 'c': ++ mode = FALLOC_FL_COLLAPSE_RANGE; ++ break; + case 'k': + mode = FALLOC_FL_KEEP_SIZE; + break; +@@ -203,6 +216,50 @@ fpunch_f( + } + return 0; + } ++ ++static int ++fcollapse_f( ++ int argc, ++ char **argv) ++{ ++ xfs_flock64_t segment; ++ int mode = FALLOC_FL_COLLAPSE_RANGE; ++ ++ if (!offset_length(argv[1], argv[2], &segment)) ++ return 0; ++ ++ if (fallocate(file->fd, mode, ++ segment.l_start, segment.l_len)) { ++ perror("fallocate"); ++ return 0; ++ } ++ return 0; ++} ++ ++static int ++fzero_f( ++ int argc, ++ char **argv) ++{ ++ xfs_flock64_t segment; ++ int mode = FALLOC_FL_ZERO_RANGE; ++ int index = 1; ++ ++ if (strncmp(argv[index], "-k", 3) == 0) { ++ mode |= FALLOC_FL_KEEP_SIZE; ++ index++; ++ } ++ ++ if (!offset_length(argv[index], argv[index + 1], &segment)) ++ return 0; ++ ++ if (fallocate(file->fd, mode, ++ segment.l_start, segment.l_len)) { ++ perror("fallocate"); ++ return 0; ++ } ++ return 0; ++} + #endif /* HAVE_FALLOCATE */ + + void +@@ -263,9 +320,9 @@ prealloc_init(void) + falloc_cmd.argmin = 2; + falloc_cmd.argmax = -1; + falloc_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; +- falloc_cmd.args = _("[-k] [-p] off len"); ++ falloc_cmd.args = _("[-c] [-k] [-p] off len"); + falloc_cmd.oneline = +- _("allocates space associated with part of a file via fallocate"); ++ _("allocates space associated with part of a file via fallocate"); + add_command(&falloc_cmd); + + fpunch_cmd.name = "fpunch"; +@@ -275,7 +332,27 @@ prealloc_init(void) + fpunch_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; + fpunch_cmd.args = _("off len"); + fpunch_cmd.oneline = +- _("de-allocates space assocated with part of a file via fallocate"); ++ _("de-allocates space assocated with part of a file via fallocate"); + add_command(&fpunch_cmd); ++ ++ fcollapse_cmd.name = "fcollapse"; ++ fcollapse_cmd.cfunc = fcollapse_f; ++ fcollapse_cmd.argmin = 2; ++ fcollapse_cmd.argmax = 2; ++ fcollapse_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; ++ fcollapse_cmd.args = _("off len"); ++ fcollapse_cmd.oneline = ++ _("de-allocates space and eliminates the hole by shifting extents"); ++ add_command(&fcollapse_cmd); ++ ++ fzero_cmd.name = "fzero"; ++ fzero_cmd.cfunc = fzero_f; ++ fzero_cmd.argmin = 2; ++ fzero_cmd.argmax = 3; ++ fzero_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; ++ fzero_cmd.args = _("[-k] off len"); ++ fzero_cmd.oneline = ++ _("zeroes space and eliminates holes by preallocating"); ++ add_command(&fzero_cmd); + #endif /* HAVE_FALLOCATE */ + } +diff --git a/libxfs/cache.c b/libxfs/cache.c +index 84d2860..dc69689 100644 +--- a/libxfs/cache.c ++++ b/libxfs/cache.c +@@ -25,6 +25,7 @@ + #include + #include + #include ++#include + + #define CACHE_DEBUG 1 + #undef CACHE_DEBUG +@@ -61,6 +62,7 @@ cache_init( + cache->c_misses = 0; + cache->c_maxcount = maxcount; + cache->c_hashsize = hashsize; ++ cache->c_hashshift = libxfs_highbit32(hashsize); + cache->hash = cache_operations->hash; + cache->alloc = cache_operations->alloc; + cache->flush = cache_operations->flush; +@@ -343,7 +345,7 @@ cache_node_get( + int priority = 0; + int purged = 0; + +- hashidx = cache->hash(key, cache->c_hashsize); ++ hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift); + hash = cache->c_hash + hashidx; + head = &hash->ch_list; + +@@ -515,7 +517,8 @@ cache_node_purge( + struct cache_hash * hash; + int count = -1; + +- hash = cache->c_hash + cache->hash(key, cache->c_hashsize); ++ hash = cache->c_hash + cache->hash(key, cache->c_hashsize, ++ cache->c_hashshift); + head = &hash->ch_list; + pthread_mutex_lock(&hash->ch_mutex); + for (pos = head->next, n = pos->next; pos != head; +diff --git a/libxfs/init.h b/libxfs/init.h +index f0b8cb6..112febb 100644 +--- a/libxfs/init.h ++++ b/libxfs/init.h +@@ -31,7 +31,6 @@ extern char *platform_findrawpath (char *path); + extern char *platform_findblockpath (char *path); + extern int platform_direct_blockdev (void); + extern int platform_align_blockdev (void); +-extern int platform_nproc(void); + extern unsigned long platform_physmem(void); /* in kilobytes */ + extern int platform_has_uuid; + +diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c +index 0219a08..1b691fb 100644 +--- a/libxfs/rdwr.c ++++ b/libxfs/rdwr.c +@@ -203,7 +203,8 @@ xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, + int, int, const struct xfs_buf_ops *); + int libxfs_writebuf(xfs_buf_t *, int); + xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int); +-xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int); ++xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, ++ int, int); + xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int, + unsigned int); + void libxfs_putbuf (xfs_buf_t *); +@@ -255,9 +256,10 @@ libxfs_trace_getbuf(const char *func, const char *file, int line, + + xfs_buf_t * + libxfs_trace_getbuf_map(const char *func, const char *file, int line, +- struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) ++ struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, ++ int flags) + { +- xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps); ++ xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags); + __add_trace(bp, func, file, line); + return bp; + } +@@ -311,10 +313,18 @@ struct xfs_bufkey { + int nmaps; + }; + ++/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ ++#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL ++#define CACHE_LINE_SIZE 64 + static unsigned int +-libxfs_bhash(cache_key_t key, unsigned int hashsize) ++libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift) + { +- return (((unsigned int)((struct xfs_bufkey *)key)->blkno) >> 5) % hashsize; ++ uint64_t hashval = ((struct xfs_bufkey *)key)->blkno; ++ uint64_t tmp; ++ ++ tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; ++ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); ++ return tmp % hashsize; + } + + static int +@@ -582,11 +592,16 @@ libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len) + } + + struct xfs_buf * +-libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) ++libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, ++ int nmaps, int flags) + { + struct xfs_bufkey key = {0}; + int i; + ++ if (nmaps == 1) ++ return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, ++ flags); ++ + key.buftarg = btp; + key.blkno = map[0].bm_bn; + for (i = 0; i < nmaps; i++) { +@@ -595,7 +610,7 @@ libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) + key.map = map; + key.nmaps = nmaps; + +- return __cache_lookup(&key, 0); ++ return __cache_lookup(&key, flags); + } + + void +@@ -724,27 +739,19 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags, + } + + int +-libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, +- struct xfs_buf_map *map, int nmaps, int flags) ++libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) + { + int fd = libxfs_device_to_fd(btp->dev); + int error = 0; + char *buf; + int i; + +- ASSERT(BBTOB(len) <= bp->b_bcount); +- +- ASSERT(bp->b_nmaps == nmaps); +- + fd = libxfs_device_to_fd(btp->dev); + buf = bp->b_addr; + for (i = 0; i < bp->b_nmaps; i++) { + off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn); + int len = BBTOB(bp->b_map[i].bm_len); + +- ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn); +- ASSERT(bp->b_map[i].bm_len == map[i].bm_len); +- + error = __read_buf(fd, buf, len, offset, flags); + if (error) { + bp->b_error = error; +@@ -775,7 +782,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, + return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len, + flags, ops); + +- bp = libxfs_getbuf_map(btp, map, nmaps); ++ bp = libxfs_getbuf_map(btp, map, nmaps, 0); + if (!bp) + return NULL; + +@@ -784,7 +791,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, + if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) + return bp; + +- error = libxfs_readbufr_map(btp, bp, map, nmaps, flags); ++ error = libxfs_readbufr_map(btp, bp, flags); + if (!error) { + bp->b_flags |= LIBXFS_B_UPTODATE; + if (bp->b_ops) +@@ -891,6 +898,11 @@ libxfs_writebufr(xfs_buf_t *bp) + int + libxfs_writebuf_int(xfs_buf_t *bp, int flags) + { ++ /* ++ * Clear any error hanging over from reading the buffer. This prevents ++ * subsequent reads after this write from seeing stale errors. ++ */ ++ bp->b_error = 0; + bp->b_flags |= (LIBXFS_B_DIRTY | flags); + return 0; + } +@@ -904,6 +916,11 @@ libxfs_writebuf(xfs_buf_t *bp, int flags) + (long long)LIBXFS_BBTOOFF64(bp->b_bn), + (long long)bp->b_bn); + #endif ++ /* ++ * Clear any error hanging over from reading the buffer. This prevents ++ * subsequent reads after this write from seeing stale errors. ++ */ ++ bp->b_error = 0; + bp->b_flags |= (LIBXFS_B_DIRTY | flags); + libxfs_putbuf(bp); + return 0; +diff --git a/libxfs/trans.c b/libxfs/trans.c +index 6a05673..c443863 100644 +--- a/libxfs/trans.c ++++ b/libxfs/trans.c +@@ -511,7 +511,7 @@ libxfs_trans_get_buf_map( + xfs_buf_log_item_t *bip; + + if (tp == NULL) +- return libxfs_getbuf_map(btp, map, nmaps); ++ return libxfs_getbuf_map(btp, map, nmaps, 0); + + bp = xfs_trans_buf_item_match(tp, btp, map, nmaps); + if (bp != NULL) { +@@ -522,7 +522,7 @@ libxfs_trans_get_buf_map( + return bp; + } + +- bp = libxfs_getbuf_map(btp, map, nmaps); ++ bp = libxfs_getbuf_map(btp, map, nmaps, 0); + if (bp == NULL) + return NULL; + #ifdef XACT_DEBUG +@@ -694,7 +694,6 @@ inode_item_done( + xfs_mount_t *mp; + xfs_buf_t *bp; + int error; +- extern kmem_zone_t *xfs_ili_zone; + + ip = iip->ili_inode; + mp = iip->ili_item.li_mountp; +@@ -736,15 +735,9 @@ ili_done: + if (iip->ili_lock_flags) { + iip->ili_lock_flags = 0; + return; +- } else { +- libxfs_iput(ip, 0); + } +- +- if (ip->i_itemp) +- kmem_zone_free(xfs_ili_zone, ip->i_itemp); +- else +- ASSERT(0); +- ip->i_itemp = NULL; ++ /* free the inode */ ++ libxfs_iput(ip, 0); + } + + static void +diff --git a/libxfs/util.c b/libxfs/util.c +index 8109ab3..1b05540 100644 +--- a/libxfs/util.c ++++ b/libxfs/util.c +@@ -730,3 +730,16 @@ cmn_err(int level, char *fmt, ...) + fputs("\n", stderr); + va_end(ap); + } ++ ++/* ++ * Warnings specifically for verifier errors. Differentiate CRC vs. invalid ++ * values, and omit the stack trace unless the error level is tuned high. ++ */ ++void ++xfs_verifier_error( ++ struct xfs_buf *bp) ++{ ++ xfs_alert(NULL, "Metadata %s detected at block 0x%llx/0x%x", ++ bp->b_error == EFSBADCRC ? "CRC error" : "corruption", ++ bp->b_bn, BBTOB(bp->b_length)); ++} +diff --git a/libxfs/xfs.h b/libxfs/xfs.h +index 364fd83..5a21590 100644 +--- a/libxfs/xfs.h ++++ b/libxfs/xfs.h +@@ -449,3 +449,4 @@ int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); + void xfs_trans_mod_sb(xfs_trans_t *, uint, long); + void xfs_trans_init(struct xfs_mount *); + int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); ++void xfs_verifier_error(struct xfs_buf *bp); +diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c +index e4fb1ad..6c82be0 100644 +--- a/libxfs/xfs_alloc.c ++++ b/libxfs/xfs_alloc.c +@@ -452,7 +452,6 @@ xfs_agfl_read_verify( + struct xfs_buf *bp) + { + struct xfs_mount *mp = bp->b_target->bt_mount; +- int agfl_ok = 1; + + /* + * There is no verification of non-crc AGFLs because mkfs does not +@@ -463,15 +462,13 @@ xfs_agfl_read_verify( + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + +- agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_agfl, agfl_crc)); +- +- agfl_ok = agfl_ok && xfs_agfl_verify(bp); +- +- if (!agfl_ok) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_agfl_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -486,16 +483,15 @@ xfs_agfl_write_verify( + return; + + if (!xfs_agfl_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + + if (bip) + XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_agfl, agfl_crc)); ++ xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); + } + + const struct xfs_buf_ops xfs_agfl_buf_ops = { +@@ -2218,19 +2214,17 @@ xfs_agf_read_verify( + struct xfs_buf *bp) + { + struct xfs_mount *mp = bp->b_target->bt_mount; +- int agf_ok = 1; +- +- if (xfs_sb_version_hascrc(&mp->m_sb)) +- agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_agf, agf_crc)); + +- agf_ok = agf_ok && xfs_agf_verify(mp, bp); +- +- if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, +- XFS_RANDOM_ALLOC_READ_AGF))) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (xfs_sb_version_hascrc(&mp->m_sb) && ++ !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, ++ XFS_ERRTAG_ALLOC_READ_AGF, ++ XFS_RANDOM_ALLOC_READ_AGF)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -2241,8 +2235,8 @@ xfs_agf_write_verify( + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_agf_verify(mp, bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -2252,8 +2246,7 @@ xfs_agf_write_verify( + if (bip) + XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_agf, agf_crc)); ++ xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); + } + + const struct xfs_buf_ops xfs_agf_buf_ops = { +diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c +index 282a320..215be7e 100644 +--- a/libxfs/xfs_alloc_btree.c ++++ b/libxfs/xfs_alloc_btree.c +@@ -337,12 +337,14 @@ static void + xfs_allocbt_read_verify( + struct xfs_buf *bp) + { +- if (!(xfs_btree_sblock_verify_crc(bp) && +- xfs_allocbt_verify(bp))) { +- trace_xfs_btree_corrupt(bp, _RET_IP_); +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- bp->b_target->bt_mount, bp->b_addr); ++ if (!xfs_btree_sblock_verify_crc(bp)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_allocbt_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ ++ if (bp->b_error) { ++ trace_xfs_btree_corrupt(bp, _RET_IP_); ++ xfs_verifier_error(bp); + } + } + +@@ -352,9 +354,9 @@ xfs_allocbt_write_verify( + { + if (!xfs_allocbt_verify(bp)) { + trace_xfs_btree_corrupt(bp, _RET_IP_); +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- bp->b_target->bt_mount, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); ++ return; + } + xfs_btree_sblock_calc_crc(bp); + +diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c +index fd52397..f7f02ae 100644 +--- a/libxfs/xfs_attr_leaf.c ++++ b/libxfs/xfs_attr_leaf.c +@@ -187,8 +187,8 @@ xfs_attr3_leaf_write_verify( + struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; + + if (!xfs_attr3_leaf_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -198,7 +198,7 @@ xfs_attr3_leaf_write_verify( + if (bip) + hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); + } + + /* +@@ -213,13 +213,14 @@ xfs_attr3_leaf_read_verify( + { + struct xfs_mount *mp = bp->b_target->bt_mount; + +- if ((xfs_sb_version_hascrc(&mp->m_sb) && +- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_ATTR3_LEAF_CRC_OFF)) || +- !xfs_attr3_leaf_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (xfs_sb_version_hascrc(&mp->m_sb) && ++ !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_attr3_leaf_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { +diff --git a/libxfs/xfs_attr_remote.c b/libxfs/xfs_attr_remote.c +index 59bb12d..5cf5c73 100644 +--- a/libxfs/xfs_attr_remote.c ++++ b/libxfs/xfs_attr_remote.c +@@ -100,7 +100,6 @@ xfs_attr3_rmt_read_verify( + struct xfs_mount *mp = bp->b_target->bt_mount; + char *ptr; + int len; +- bool corrupt = false; + xfs_daddr_t bno; + + /* no verification of non-crc buffers */ +@@ -115,11 +114,11 @@ xfs_attr3_rmt_read_verify( + while (len > 0) { + if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), + XFS_ATTR3_RMT_CRC_OFF)) { +- corrupt = true; ++ xfs_buf_ioerror(bp, EFSBADCRC); + break; + } + if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { +- corrupt = true; ++ xfs_buf_ioerror(bp, EFSCORRUPTED); + break; + } + len -= XFS_LBSIZE(mp); +@@ -127,10 +126,9 @@ xfs_attr3_rmt_read_verify( + bno += mp->m_bsize; + } + +- if (corrupt) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +- xfs_buf_ioerror(bp, EFSCORRUPTED); +- } else ++ if (bp->b_error) ++ xfs_verifier_error(bp); ++ else + ASSERT(len == 0); + } + +@@ -155,9 +153,8 @@ xfs_attr3_rmt_write_verify( + + while (len > 0) { + if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { +- XFS_CORRUPTION_ERROR(__func__, +- XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + if (bip) { +diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c +index 3136e4f..a4bd69d 100644 +--- a/libxfs/xfs_bmap_btree.c ++++ b/libxfs/xfs_bmap_btree.c +@@ -759,12 +759,14 @@ static void + xfs_bmbt_read_verify( + struct xfs_buf *bp) + { +- if (!(xfs_btree_lblock_verify_crc(bp) && +- xfs_bmbt_verify(bp))) { +- trace_xfs_btree_corrupt(bp, _RET_IP_); +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- bp->b_target->bt_mount, bp->b_addr); ++ if (!xfs_btree_lblock_verify_crc(bp)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_bmbt_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ ++ if (bp->b_error) { ++ trace_xfs_btree_corrupt(bp, _RET_IP_); ++ xfs_verifier_error(bp); + } + } + +@@ -773,11 +775,9 @@ xfs_bmbt_write_verify( + struct xfs_buf *bp) + { + if (!xfs_bmbt_verify(bp)) { +- xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn); + trace_xfs_btree_corrupt(bp, _RET_IP_); +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- bp->b_target->bt_mount, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + xfs_btree_lblock_calc_crc(bp); +diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c +index 2dd6fb7..9be4abd 100644 +--- a/libxfs/xfs_btree.c ++++ b/libxfs/xfs_btree.c +@@ -218,8 +218,7 @@ xfs_btree_lblock_calc_crc( + return; + if (bip) + block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_BTREE_LBLOCK_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + } + + bool +@@ -227,8 +226,8 @@ xfs_btree_lblock_verify_crc( + struct xfs_buf *bp) + { + if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_BTREE_LBLOCK_CRC_OFF); ++ return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); ++ + return true; + } + +@@ -251,8 +250,7 @@ xfs_btree_sblock_calc_crc( + return; + if (bip) + block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_BTREE_SBLOCK_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + } + + bool +@@ -260,8 +258,8 @@ xfs_btree_sblock_verify_crc( + struct xfs_buf *bp) + { + if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_BTREE_SBLOCK_CRC_OFF); ++ return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); ++ + return true; + } + +diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c +index 53414f5..154adb1 100644 +--- a/libxfs/xfs_da_btree.c ++++ b/libxfs/xfs_da_btree.c +@@ -209,8 +209,8 @@ xfs_da3_node_write_verify( + struct xfs_da3_node_hdr *hdr3 = bp->b_addr; + + if (!xfs_da3_node_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -220,7 +220,7 @@ xfs_da3_node_write_verify( + if (bip) + hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); + } + + /* +@@ -233,18 +233,20 @@ static void + xfs_da3_node_read_verify( + struct xfs_buf *bp) + { +- struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_da_blkinfo *info = bp->b_addr; + + switch (be16_to_cpu(info->magic)) { + case XFS_DA3_NODE_MAGIC: +- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_DA3_NODE_CRC_OFF)) ++ if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { ++ xfs_buf_ioerror(bp, EFSBADCRC); + break; ++ } + /* fall through */ + case XFS_DA_NODE_MAGIC: +- if (!xfs_da3_node_verify(bp)) ++ if (!xfs_da3_node_verify(bp)) { ++ xfs_buf_ioerror(bp, EFSCORRUPTED); + break; ++ } + return; + case XFS_ATTR_LEAF_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: +@@ -261,8 +263,7 @@ xfs_da3_node_read_verify( + } + + /* corrupt block */ +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +- xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + } + + const struct xfs_buf_ops xfs_da3_node_buf_ops = { +diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c +index 96a3c1d..4c8c836 100644 +--- a/libxfs/xfs_dir2.c ++++ b/libxfs/xfs_dir2.c +@@ -20,6 +20,22 @@ + + struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; + ++/* ++ * @mode, if set, indicates that the type field needs to be set up. ++ * This uses the transformation from file mode to DT_* as defined in linux/fs.h ++ * for file type specification. This will be propagated into the directory ++ * structure if appropriate for the given operation and filesystem config. ++ */ ++const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { ++ [0] = XFS_DIR3_FT_UNKNOWN, ++ [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, ++ [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, ++ [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, ++ [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, ++ [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, ++ [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, ++ [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, ++}; + + /* + * ASCII case-insensitive (ie. A-Z) support for directories that was +diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c +index 1d8f598..cede01f 100644 +--- a/libxfs/xfs_dir2_block.c ++++ b/libxfs/xfs_dir2_block.c +@@ -70,13 +70,14 @@ xfs_dir3_block_read_verify( + { + struct xfs_mount *mp = bp->b_target->bt_mount; + +- if ((xfs_sb_version_hascrc(&mp->m_sb) && +- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_DIR3_DATA_CRC_OFF)) || +- !xfs_dir3_block_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (xfs_sb_version_hascrc(&mp->m_sb) && ++ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_dir3_block_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -88,8 +89,8 @@ xfs_dir3_block_write_verify( + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_block_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -99,7 +100,7 @@ xfs_dir3_block_write_verify( + if (bip) + hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); + } + + const struct xfs_buf_ops xfs_dir3_block_buf_ops = { +diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c +index 189699f..dc9df4d 100644 +--- a/libxfs/xfs_dir2_data.c ++++ b/libxfs/xfs_dir2_data.c +@@ -208,7 +208,6 @@ static void + xfs_dir3_data_reada_verify( + struct xfs_buf *bp) + { +- struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_dir2_data_hdr *hdr = bp->b_addr; + + switch (hdr->magic) { +@@ -222,8 +221,8 @@ xfs_dir3_data_reada_verify( + xfs_dir3_data_verify(bp); + return; + default: +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + break; + } + } +@@ -234,13 +233,14 @@ xfs_dir3_data_read_verify( + { + struct xfs_mount *mp = bp->b_target->bt_mount; + +- if ((xfs_sb_version_hascrc(&mp->m_sb) && +- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_DIR3_DATA_CRC_OFF)) || +- !xfs_dir3_data_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (xfs_sb_version_hascrc(&mp->m_sb) && ++ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_dir3_data_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -252,8 +252,8 @@ xfs_dir3_data_write_verify( + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_data_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -263,7 +263,7 @@ xfs_dir3_data_write_verify( + if (bip) + hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); + } + + const struct xfs_buf_ops xfs_dir3_data_buf_ops = { +diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c +index 683536e..8e0cbc9 100644 +--- a/libxfs/xfs_dir2_leaf.c ++++ b/libxfs/xfs_dir2_leaf.c +@@ -206,13 +206,14 @@ __read_verify( + { + struct xfs_mount *mp = bp->b_target->bt_mount; + +- if ((xfs_sb_version_hascrc(&mp->m_sb) && +- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_DIR3_LEAF_CRC_OFF)) || +- !xfs_dir3_leaf_verify(bp, magic)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (xfs_sb_version_hascrc(&mp->m_sb) && ++ !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_dir3_leaf_verify(bp, magic)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -225,8 +226,8 @@ __write_verify( + struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_leaf_verify(bp, magic)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -236,7 +237,7 @@ __write_verify( + if (bip) + hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); + } + + static void +diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c +index ced8c58..3256756 100644 +--- a/libxfs/xfs_dir2_node.c ++++ b/libxfs/xfs_dir2_node.c +@@ -98,13 +98,14 @@ xfs_dir3_free_read_verify( + { + struct xfs_mount *mp = bp->b_target->bt_mount; + +- if ((xfs_sb_version_hascrc(&mp->m_sb) && +- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- XFS_DIR3_FREE_CRC_OFF)) || +- !xfs_dir3_free_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (xfs_sb_version_hascrc(&mp->m_sb) && ++ !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_dir3_free_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -116,8 +117,8 @@ xfs_dir3_free_write_verify( + struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; + + if (!xfs_dir3_free_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -127,7 +128,7 @@ xfs_dir3_free_write_verify( + if (bip) + hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); ++ xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); + } + + const struct xfs_buf_ops xfs_dir3_free_buf_ops = { +diff --git a/libxfs/xfs_dquot_buf.c b/libxfs/xfs_dquot_buf.c +index 6bbb0ff..e089ec8 100644 +--- a/libxfs/xfs_dquot_buf.c ++++ b/libxfs/xfs_dquot_buf.c +@@ -237,10 +237,13 @@ xfs_dquot_buf_read_verify( + { + struct xfs_mount *mp = bp->b_target->bt_mount; + +- if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (!xfs_dquot_buf_verify_crc(mp, bp)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_dquot_buf_verify(mp, bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + /* +@@ -255,8 +258,8 @@ xfs_dquot_buf_write_verify( + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (!xfs_dquot_buf_verify(mp, bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + } +diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c +index afe1a82..c19d84a 100644 +--- a/libxfs/xfs_ialloc.c ++++ b/libxfs/xfs_ialloc.c +@@ -1551,18 +1551,17 @@ xfs_agi_read_verify( + struct xfs_buf *bp) + { + struct xfs_mount *mp = bp->b_target->bt_mount; +- int agi_ok = 1; + +- if (xfs_sb_version_hascrc(&mp->m_sb)) +- agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_agi, agi_crc)); +- agi_ok = agi_ok && xfs_agi_verify(bp); +- +- if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, +- XFS_RANDOM_IALLOC_READ_AGI))) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (xfs_sb_version_hascrc(&mp->m_sb) && ++ !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, ++ XFS_ERRTAG_IALLOC_READ_AGI, ++ XFS_RANDOM_IALLOC_READ_AGI)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -1573,8 +1572,8 @@ xfs_agi_write_verify( + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_agi_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -1583,8 +1582,7 @@ xfs_agi_write_verify( + + if (bip) + XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_agi, agi_crc)); ++ xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); + } + + const struct xfs_buf_ops xfs_agi_buf_ops = { +diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c +index 27a5dd9..0a29d73 100644 +--- a/libxfs/xfs_ialloc_btree.c ++++ b/libxfs/xfs_ialloc_btree.c +@@ -224,12 +224,14 @@ static void + xfs_inobt_read_verify( + struct xfs_buf *bp) + { +- if (!(xfs_btree_sblock_verify_crc(bp) && +- xfs_inobt_verify(bp))) { +- trace_xfs_btree_corrupt(bp, _RET_IP_); +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- bp->b_target->bt_mount, bp->b_addr); ++ if (!xfs_btree_sblock_verify_crc(bp)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_inobt_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ ++ if (bp->b_error) { ++ trace_xfs_btree_corrupt(bp, _RET_IP_); ++ xfs_verifier_error(bp); + } + } + +@@ -239,9 +241,9 @@ xfs_inobt_write_verify( + { + if (!xfs_inobt_verify(bp)) { + trace_xfs_btree_corrupt(bp, _RET_IP_); +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- bp->b_target->bt_mount, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); ++ return; + } + xfs_btree_sblock_calc_crc(bp); + +diff --git a/libxfs/xfs_inode_buf.c b/libxfs/xfs_inode_buf.c +index d245d72..de16ed9 100644 +--- a/libxfs/xfs_inode_buf.c ++++ b/libxfs/xfs_inode_buf.c +@@ -88,8 +88,7 @@ xfs_inode_buf_verify( + } + + xfs_buf_ioerror(bp, EFSCORRUPTED); +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, +- mp, dip); ++ xfs_verifier_error(bp); + #ifdef DEBUG + xfs_alert(mp, + "bad inode magic/vsn daddr %lld #%d (magic=%x)", +@@ -292,7 +291,7 @@ xfs_dinode_verify( + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return false; + if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, +- offsetof(struct xfs_dinode, di_crc))) ++ XFS_DINODE_CRC_OFF)) + return false; + if (be64_to_cpu(dip->di_ino) != ino) + return false; +@@ -313,7 +312,7 @@ xfs_dinode_calc_crc( + + ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); + crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, +- offsetof(struct xfs_dinode, di_crc)); ++ XFS_DINODE_CRC_OFF); + dip->di_crc = xfs_end_cksum(crc); + } + +diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c +index 48b1a97..7ee4612 100644 +--- a/libxfs/xfs_sb.c ++++ b/libxfs/xfs_sb.c +@@ -258,6 +258,7 @@ xfs_mount_validate_sb( + sbp->sb_inodelog < XFS_DINODE_MIN_LOG || + sbp->sb_inodelog > XFS_DINODE_MAX_LOG || + sbp->sb_inodesize != (1 << sbp->sb_inodelog) || ++ sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || + (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || + (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || + (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || +@@ -265,8 +266,7 @@ xfs_mount_validate_sb( + sbp->sb_dblocks == 0 || + sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || + sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { +- XFS_CORRUPTION_ERROR("SB sanity check failed", +- XFS_ERRLEVEL_LOW, mp, sbp); ++ xfs_notice(mp, "SB sanity check failed"); + return XFS_ERROR(EFSCORRUPTED); + } + +@@ -542,6 +542,11 @@ xfs_sb_verify( + * single bit error could clear the feature bit and unused parts of the + * superblock are supposed to be zero. Hence a non-null crc field indicates that + * we've potentially lost a feature bit and we should check it anyway. ++ * ++ * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the ++ * last field in V4 secondary superblocks. So for secondary superblocks, ++ * we are more forgiving, and ignore CRC failures if the primary doesn't ++ * indicate that the fs version is V5. + */ + static void + xfs_sb_read_verify( +@@ -560,20 +565,22 @@ xfs_sb_read_verify( + XFS_SB_VERSION_5) || + dsb->sb_crc != 0)) { + +- if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), +- offsetof(struct xfs_sb, sb_crc))) { +- error = EFSCORRUPTED; +- goto out_error; ++ if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { ++ /* Only fail bad secondaries on a known V5 filesystem */ ++ if (bp->b_bn == XFS_SB_DADDR || ++ xfs_sb_version_hascrc(&mp->m_sb)) { ++ error = EFSBADCRC; ++ goto out_error; ++ } + } + } + error = xfs_sb_verify(bp, true); + + out_error: + if (error) { +- if (error != EWRONGFS) +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- mp, bp->b_addr); + xfs_buf_ioerror(bp, error); ++ if (error == EFSCORRUPTED || error == EFSBADCRC) ++ xfs_verifier_error(bp); + } + } + +@@ -589,7 +596,6 @@ xfs_sb_quiet_read_verify( + { + struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); + +- + if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { + /* XFS filesystem, verify noisily! */ + xfs_sb_read_verify(bp); +@@ -609,9 +615,8 @@ xfs_sb_write_verify( + + error = xfs_sb_verify(bp, false); + if (error) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, +- mp, bp->b_addr); + xfs_buf_ioerror(bp, error); ++ xfs_verifier_error(bp); + return; + } + +@@ -621,8 +626,7 @@ xfs_sb_write_verify( + if (bip) + XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_sb, sb_crc)); ++ xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); + } + + const struct xfs_buf_ops xfs_sb_buf_ops = { +diff --git a/libxfs/xfs_symlink_remote.c b/libxfs/xfs_symlink_remote.c +index 539db0c..ebf60ac 100644 +--- a/libxfs/xfs_symlink_remote.c ++++ b/libxfs/xfs_symlink_remote.c +@@ -116,12 +116,13 @@ xfs_symlink_read_verify( + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + +- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_dsymlink_hdr, sl_crc)) || +- !xfs_symlink_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); ++ if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) ++ xfs_buf_ioerror(bp, EFSBADCRC); ++ else if (!xfs_symlink_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); +- } ++ ++ if (bp->b_error) ++ xfs_verifier_error(bp); + } + + static void +@@ -136,8 +137,8 @@ xfs_symlink_write_verify( + return; + + if (!xfs_symlink_verify(bp)) { +- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); + xfs_buf_ioerror(bp, EFSCORRUPTED); ++ xfs_verifier_error(bp); + return; + } + +@@ -145,8 +146,7 @@ xfs_symlink_write_verify( + struct xfs_dsymlink_hdr *dsl = bp->b_addr; + dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); + } +- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +- offsetof(struct xfs_dsymlink_hdr, sl_crc)); ++ xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); + } + + const struct xfs_buf_ops xfs_symlink_buf_ops = { +diff --git a/logprint/log_misc.c b/logprint/log_misc.c +index cf9d59d..52f1e85 100644 +--- a/logprint/log_misc.c ++++ b/logprint/log_misc.c +@@ -810,7 +810,6 @@ xlog_print_trans_icreate( + + memmove(&icl_buf, *ptr, MIN(sizeof(struct xfs_icreate_log), len)); + icl = &icl_buf; +- (*i)++; + *ptr += len; + + /* handle complete header only */ +@@ -874,7 +873,7 @@ xlog_print_record( + int bad_hdr_warn) + { + xfs_caddr_t buf, ptr; +- int read_len, skip; ++ int read_len, skip, lost_context = 0; + int ret, n, i, j, k; + + if (print_no_print) +@@ -995,7 +994,10 @@ xlog_print_record( + if (xlog_print_find_tid(be32_to_cpu(op_head->oh_tid), + op_head->oh_flags & XLOG_WAS_CONT_TRANS)) { + printf(_("Left over region from split log item\n")); ++ /* Skip this leftover bit */ + ptr += be32_to_cpu(op_head->oh_len); ++ /* We've lost context; don't complain if next one looks bad too */ ++ lost_context = 1; + continue; + } + +@@ -1050,7 +1052,7 @@ xlog_print_record( + break; + } + default: { +- if (bad_hdr_warn) { ++ if (bad_hdr_warn && !lost_context) { + fprintf(stderr, + _("%s: unknown log operation type (%x)\n"), + progname, *(unsigned short *)ptr); +@@ -1064,6 +1066,7 @@ xlog_print_record( + } + skip = 0; + ptr += be32_to_cpu(op_head->oh_len); ++ lost_context = 0; + } + } /* switch */ + } /* else */ +diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8 +index 767b50e..0dec0b7 100644 +--- a/man/man8/xfs_io.8 ++++ b/man/man8/xfs_io.8 +@@ -4,7 +4,7 @@ xfs_io \- debug the I/O path of an XFS filesystem + .SH SYNOPSIS + .B xfs_io + [ +-.B \-adfmrRstx ++.B \-adfmrRstxT + ] [ + .B \-c + .I cmd +@@ -88,7 +88,7 @@ command for more details on any command. + Display a list of all open files and (optionally) switch to an alternate + current open file. + .TP +-.BI "open [[ \-acdfrstR ] " path " ]" ++.BI "open [[ \-acdfrstRT ] " path " ]" + Closes the current file, and opens the file specified by + .I path + instead. Without any arguments, displays statistics about the current +@@ -119,6 +119,14 @@ truncates on open (O_TRUNC). + .B \-n + opens in non-blocking mode if possible (O_NONBLOCK). + .TP ++.B \-T ++create a temporary file not linked into the filesystem namespace ++(O_TMPFILE). The pathname passed must refer to a directory which ++is treated as virtual parent for the newly created invisible file. ++Can not be used together with the ++.B \-r ++option. ++.TP + .B \-R + marks the file as a realtime XFS file after + opening it, if it is not already marked as such. +@@ -380,12 +388,23 @@ will set the FALLOC_FL_KEEP_SIZE flag as described in + .PD + .RE + .TP ++.BI fcollapse " offset length" ++Call fallocate with FALLOC_FL_COLLAPSE_RANGE flag as described in the ++.BR fallocate (2) ++manual page to de-allocates blocks and eliminates the hole created in this process ++by shifting data blocks into the hole. ++.TP + .BI fpunch " offset length" + Punches (de-allocates) blocks in the file by calling fallocate with + the FALLOC_FL_PUNCH_HOLE flag as described in the + .BR fallocate (2) + manual page. + .TP ++.BI fzero " offset length" ++Call fallocate with FALLOC_FL_ZERO_RANGE flag as described in the ++.BR fallocate (2) ++manual page to allocate and zero blocks within the range. ++.TP + .BI truncate " offset" + Truncates the current file at the given offset using + .BR ftruncate (2). +diff --git a/man/man8/xfs_metadump.8 b/man/man8/xfs_metadump.8 +index 4fa1b1c..077fff5 100644 +--- a/man/man8/xfs_metadump.8 ++++ b/man/man8/xfs_metadump.8 +@@ -4,7 +4,7 @@ xfs_metadump \- copy XFS filesystem metadata to a file + .SH SYNOPSIS + .B xfs_metadump + [ +-.B \-efgow ++.B \-efFgow + ] [ + .B \-m + .I max_extents +@@ -86,6 +86,11 @@ file option). This can also happen if an image copy of a filesystem has + been made into an ordinary file with + .BR xfs_copy (8). + .TP ++.B \-F ++Specifies that we want to continue even if the superblock magic is not correct. ++If the source is truly not an XFS filesystem, the resulting image will be useless, ++and xfs_metadump may crash. ++.TP + .B \-g + Shows dump progress. This is sent to stdout if the + .I target +diff --git a/man/man8/xfs_repair.8 b/man/man8/xfs_repair.8 +index ed20fb7..b7c2d8c 100644 +--- a/man/man8/xfs_repair.8 ++++ b/man/man8/xfs_repair.8 +@@ -144,7 +144,7 @@ reduce repair times on concat based filesystems. + .BI force_geometry + Check the filesystem even if geometry information could not be validated. + Geometry information can not be validated if only a single allocation +-group and exist and thus we do not have a backup superblock available, or ++group exists and thus we do not have a backup superblock available, or + if there are two allocation groups and the two superblocks do not + agree on the filesystem geometry. Only use this option if you validated + the geometry yourself and know what you are doing. If In doubt run +diff --git a/mkfs/proto.c b/mkfs/proto.c +index 4cc0df6..4d3680d 100644 +--- a/mkfs/proto.c ++++ b/mkfs/proto.c +@@ -438,6 +438,7 @@ parseproto( + creds.cr_gid = (int)getnum(pp); + xname.name = (uchar_t *)name; + xname.len = name ? strlen(name) : 0; ++ xname.type = 0; + tp = libxfs_trans_alloc(mp, 0); + flags = XFS_ILOG_CORE; + xfs_bmap_init(&flist, &first); +@@ -453,6 +454,7 @@ parseproto( + if (buf) + free(buf); + libxfs_trans_ijoin(tp, pip, 0); ++ xname.type = XFS_DIR3_FT_REG_FILE; + newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); + libxfs_trans_ihold(tp, pip); + break; +@@ -469,6 +471,7 @@ parseproto( + + libxfs_trans_ijoin(tp, pip, 0); + ++ xname.type = XFS_DIR3_FT_REG_FILE; + newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); + libxfs_trans_ihold(tp, pip); + libxfs_trans_log_inode(tp, ip, flags); +@@ -490,6 +493,7 @@ parseproto( + fail(_("Inode allocation failed"), error); + } + libxfs_trans_ijoin(tp, pip, 0); ++ xname.type = XFS_DIR3_FT_BLKDEV; + newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); + libxfs_trans_ihold(tp, pip); + flags |= XFS_ILOG_DEV; +@@ -504,6 +508,7 @@ parseproto( + if (error) + fail(_("Inode allocation failed"), error); + libxfs_trans_ijoin(tp, pip, 0); ++ xname.type = XFS_DIR3_FT_CHRDEV; + newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); + libxfs_trans_ihold(tp, pip); + flags |= XFS_ILOG_DEV; +@@ -516,6 +521,7 @@ parseproto( + if (error) + fail(_("Inode allocation failed"), error); + libxfs_trans_ijoin(tp, pip, 0); ++ xname.type = XFS_DIR3_FT_FIFO; + newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); + libxfs_trans_ihold(tp, pip); + break; +@@ -529,6 +535,7 @@ parseproto( + fail(_("Inode allocation failed"), error); + flags |= newfile(tp, ip, &flist, &first, 1, 1, buf, len); + libxfs_trans_ijoin(tp, pip, 0); ++ xname.type = XFS_DIR3_FT_SYMLINK; + newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); + libxfs_trans_ihold(tp, pip); + break; +@@ -546,6 +553,7 @@ parseproto( + isroot = 1; + } else { + libxfs_trans_ijoin(tp, pip, 0); ++ xname.type = XFS_DIR3_FT_DIR; + newdirent(mp, tp, pip, &xname, ip->i_ino, + &first, &flist); + pip->i_d.di_nlink++; +diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c +index d82128c..f7cf394 100644 +--- a/mkfs/xfs_mkfs.c ++++ b/mkfs/xfs_mkfs.c +@@ -2366,32 +2366,40 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), + } else if (!loginternal && !xi.logdev) { + logblocks = 0; + } else if (loginternal && !logsize) { +- /* +- * With a 2GB max log size, default to maximum size +- * at 4TB. This keeps the same ratio from the older +- * max log size of 128M at 256GB fs size. IOWs, +- * the ratio of fs size to log size is 2048:1. +- */ +- logblocks = (dblocks << blocklog) / 2048; +- logblocks = logblocks >> blocklog; +- logblocks = MAX(min_logblocks, logblocks); + +- /* +- * If the default log size doesn't fit in the AG size, use the +- * minimum log size instead. This ensures small filesystems +- * don't use excessive amounts of space for the log. +- */ +- if (min_logblocks * XFS_DFL_LOG_FACTOR >= agsize) { ++ if (dblocks < GIGABYTES(1, blocklog)) { ++ /* tiny filesystems get minimum sized logs. */ + logblocks = min_logblocks; ++ } else if (dblocks < GIGABYTES(16, blocklog)) { ++ ++ /* ++ * For small filesystems, we want to use the ++ * XFS_MIN_LOG_BYTES for filesystems smaller than 16G if ++ * at all possible, ramping up to 128MB at 256GB. ++ */ ++ logblocks = MIN(XFS_MIN_LOG_BYTES >> blocklog, ++ min_logblocks * XFS_DFL_LOG_FACTOR); + } else { +- logblocks = MAX(logblocks, +- MAX(XFS_DFL_LOG_SIZE, +- min_logblocks * XFS_DFL_LOG_FACTOR)); ++ /* ++ * With a 2GB max log size, default to maximum size ++ * at 4TB. This keeps the same ratio from the older ++ * max log size of 128M at 256GB fs size. IOWs, ++ * the ratio of fs size to log size is 2048:1. ++ */ ++ logblocks = (dblocks << blocklog) / 2048; ++ logblocks = logblocks >> blocklog; ++ logblocks = MAX(min_logblocks, logblocks); + } ++ ++ /* make sure the log fits wholly within an AG */ ++ if (logblocks >= agsize) ++ logblocks = min_logblocks; ++ ++ /* and now clamp the size to the maximum supported size */ + logblocks = MIN(logblocks, XFS_MAX_LOG_BLOCKS); +- if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) { ++ if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) + logblocks = XFS_MAX_LOG_BYTES >> blocklog; +- } ++ + } + validate_log_size(logblocks, blocklog, min_logblocks); + +diff --git a/repair/agheader.c b/repair/agheader.c +index 53e47b6..fc5dac9 100644 +--- a/repair/agheader.c ++++ b/repair/agheader.c +@@ -472,7 +472,7 @@ verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb, + int status = XR_OK; + int status_sb = XR_OK; + +- status = verify_sb(sb, (i == 0)); ++ status = verify_sb(sbuf->b_addr, sb, (i == 0)); + + if (status != XR_OK) { + do_warn(_("bad on-disk superblock %d - %s\n"), +diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c +index d3c2236..afb26e0 100644 +--- a/repair/dino_chunks.c ++++ b/repair/dino_chunks.c +@@ -141,7 +141,7 @@ verify_inode_chunk(xfs_mount_t *mp, + if (check_aginode_block(mp, agno, agino) == 0) + return 0; + +- pthread_mutex_lock(&ag_locks[agno]); ++ pthread_mutex_lock(&ag_locks[agno].lock); + + state = get_bmap(agno, agbno); + switch (state) { +@@ -166,7 +166,7 @@ verify_inode_chunk(xfs_mount_t *mp, + _("inode block %d/%d multiply claimed, (state %d)\n"), + agno, agbno, state); + set_bmap(agno, agbno, XR_E_MULT); +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + return(0); + default: + do_warn( +@@ -176,7 +176,7 @@ verify_inode_chunk(xfs_mount_t *mp, + break; + } + +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + + start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0); + *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino); +@@ -424,7 +424,7 @@ verify_inode_chunk(xfs_mount_t *mp, + * user data -- we're probably here as a result of a directory + * entry or an iunlinked pointer + */ +- pthread_mutex_lock(&ag_locks[agno]); ++ pthread_mutex_lock(&ag_locks[agno].lock); + for (cur_agbno = chunk_start_agbno; + cur_agbno < chunk_stop_agbno; + cur_agbno += blen) { +@@ -438,7 +438,7 @@ verify_inode_chunk(xfs_mount_t *mp, + _("inode block %d/%d multiply claimed, (state %d)\n"), + agno, cur_agbno, state); + set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT); +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + return 0; + case XR_E_INO: + do_error( +@@ -449,7 +449,7 @@ verify_inode_chunk(xfs_mount_t *mp, + break; + } + } +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + + /* + * ok, chunk is good. put the record into the tree if required, +@@ -472,7 +472,7 @@ verify_inode_chunk(xfs_mount_t *mp, + + set_inode_used(irec_p, agino - start_agino); + +- pthread_mutex_lock(&ag_locks[agno]); ++ pthread_mutex_lock(&ag_locks[agno].lock); + + for (cur_agbno = chunk_start_agbno; + cur_agbno < chunk_stop_agbno; +@@ -505,7 +505,7 @@ verify_inode_chunk(xfs_mount_t *mp, + break; + } + } +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + + return(ino_cnt); + } +@@ -736,7 +736,7 @@ process_inode_chunk( + /* + * mark block as an inode block in the incore bitmap + */ +- pthread_mutex_lock(&ag_locks[agno]); ++ pthread_mutex_lock(&ag_locks[agno].lock); + state = get_bmap(agno, agbno); + switch (state) { + case XR_E_INO: /* already marked */ +@@ -755,7 +755,7 @@ process_inode_chunk( + XFS_AGB_TO_FSB(mp, agno, agbno), state); + break; + } +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + + for (;;) { + /* +@@ -788,6 +788,8 @@ process_inode_chunk( + * we do now, this is where to start. + */ + if (is_used) { ++ __uint16_t di_mode; ++ + if (is_inode_free(ino_rec, irec_offset)) { + if (verbose || no_modify) { + do_warn( +@@ -803,6 +805,15 @@ process_inode_chunk( + set_inode_used(ino_rec, irec_offset); + + /* ++ * store the on-disk file type for comparing in ++ * phase 6. ++ */ ++ di_mode = be16_to_cpu(dino->di_mode); ++ di_mode = (di_mode & S_IFMT) >> S_SHIFT; ++ set_inode_ftype(ino_rec, irec_offset, ++ xfs_mode_to_ftype[di_mode]); ++ ++ /* + * store on-disk nlink count for comparing in phase 7 + */ + set_inode_disk_nlinks(ino_rec, irec_offset, +@@ -914,7 +925,7 @@ process_inode_chunk( + ibuf_offset = 0; + agbno++; + +- pthread_mutex_lock(&ag_locks[agno]); ++ pthread_mutex_lock(&ag_locks[agno].lock); + state = get_bmap(agno, agbno); + switch (state) { + case XR_E_INO: /* already marked */ +@@ -935,7 +946,7 @@ process_inode_chunk( + XFS_AGB_TO_FSB(mp, agno, agbno), state); + break; + } +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + + } else if (irec_offset == XFS_INODES_PER_CHUNK) { + /* +diff --git a/repair/dinode.c b/repair/dinode.c +index 7469fc8..48f17ac 100644 +--- a/repair/dinode.c ++++ b/repair/dinode.c +@@ -32,6 +32,37 @@ + #include "threads.h" + + /* ++ * gettext lookups for translations of strings use mutexes internally to ++ * the library. Hence when we come through here doing parallel scans in ++ * multiple AGs, then all do concurrent text conversions and serialise ++ * on the translation string lookups. Let's avoid doing repeated lookups ++ * by making them static variables and only assigning the translation ++ * once. ++ */ ++static char *forkname_data; ++static char *forkname_attr; ++static char *ftype_real_time; ++static char *ftype_regular; ++ ++void ++dinode_bmbt_translation_init(void) ++{ ++ forkname_data = _("data"); ++ forkname_attr = _("attr"); ++ ftype_real_time = _("real-time"); ++ ftype_regular = _("regular"); ++} ++ ++char * ++get_forkname(int whichfork) ++{ ++ ++ if (whichfork == XFS_DATA_FORK) ++ return forkname_data; ++ return forkname_attr; ++} ++ ++/* + * inode clearing routines + */ + +@@ -542,7 +573,7 @@ process_bmbt_reclist_int( + xfs_dfiloff_t op = 0; /* prev offset */ + xfs_dfsbno_t b; + char *ftype; +- char *forkname; ++ char *forkname = get_forkname(whichfork); + int i; + int state; + xfs_agnumber_t agno; +@@ -552,15 +583,10 @@ process_bmbt_reclist_int( + xfs_agnumber_t locked_agno = -1; + int error = 1; + +- if (whichfork == XFS_DATA_FORK) +- forkname = _("data"); +- else +- forkname = _("attr"); +- + if (type == XR_INO_RTDATA) +- ftype = _("real-time"); ++ ftype = ftype_real_time; + else +- ftype = _("regular"); ++ ftype = ftype_regular; + + for (i = 0; i < *numrecs; i++) { + libxfs_bmbt_disk_get_all(rp + i, &irec); +@@ -651,9 +677,10 @@ _("inode %" PRIu64 " - extent offset too large - start %" PRIu64 ", " + } + + if (blkmapp && *blkmapp) { +- error = blkmap_set_ext(blkmapp, irec.br_startoff, ++ int error2; ++ error2 = blkmap_set_ext(blkmapp, irec.br_startoff, + irec.br_startblock, irec.br_blockcount); +- if (error) { ++ if (error2) { + /* + * we don't want to clear the inode due to an + * internal bmap tracking error, but if we've +@@ -665,7 +692,7 @@ _("inode %" PRIu64 " - extent offset too large - start %" PRIu64 ", " + do_abort( + _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n" + "\t%s fork, off - %" PRIu64 ", start - %" PRIu64 ", cnt %" PRIu64 "\n"), +- ino, strerror(error), forkname, ++ ino, strerror(error2), forkname, + irec.br_startoff, irec.br_startblock, + irec.br_blockcount); + } +@@ -680,8 +707,8 @@ _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n" + ebno = agbno + irec.br_blockcount; + if (agno != locked_agno) { + if (locked_agno != -1) +- pthread_mutex_unlock(&ag_locks[locked_agno]); +- pthread_mutex_lock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[locked_agno].lock); ++ pthread_mutex_lock(&ag_locks[agno].lock); + locked_agno = agno; + } + +@@ -750,7 +777,7 @@ _("illegal state %d in block map %" PRIu64 "\n"), + error = 0; + done: + if (locked_agno != -1) +- pthread_mutex_unlock(&ag_locks[locked_agno]); ++ pthread_mutex_unlock(&ag_locks[locked_agno].lock); + + if (i != *numrecs) { + ASSERT(i < *numrecs); +@@ -1109,7 +1136,7 @@ process_btinode( + xfs_ino_t lino; + xfs_bmbt_ptr_t *pp; + xfs_bmbt_key_t *pkey; +- char *forkname; ++ char *forkname = get_forkname(whichfork); + int i; + int level; + int numrecs; +@@ -1121,11 +1148,6 @@ process_btinode( + *tot = 0; + *nex = 0; + +- if (whichfork == XFS_DATA_FORK) +- forkname = _("data"); +- else +- forkname = _("attr"); +- + magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_BMAP_CRC_MAGIC + : XFS_BMAP_MAGIC; + +diff --git a/repair/dinode.h b/repair/dinode.h +index d9197c1..5ee51ca 100644 +--- a/repair/dinode.h ++++ b/repair/dinode.h +@@ -18,9 +18,8 @@ + #ifndef _XR_DINODE_H + #define _XR_DINODE_H + +-#include "prefetch.h" +- + struct blkmap; ++struct prefetch_args; + + int + verify_agbno(xfs_mount_t *mp, +@@ -103,12 +102,12 @@ int + process_uncertain_aginodes(xfs_mount_t *mp, + xfs_agnumber_t agno); + void +-process_aginodes(xfs_mount_t *mp, +- prefetch_args_t *pf_args, +- xfs_agnumber_t agno, +- int check_dirs, +- int check_dups, +- int extra_attr_check); ++process_aginodes(xfs_mount_t *mp, ++ struct prefetch_args *pf_args, ++ xfs_agnumber_t agno, ++ int check_dirs, ++ int check_dups, ++ int extra_attr_check); + + void + check_uncertain_aginodes(xfs_mount_t *mp, +@@ -127,4 +126,7 @@ get_bmapi(xfs_mount_t *mp, + xfs_dfiloff_t bno, + int whichfork ); + ++void dinode_bmbt_translation_init(void); ++char * get_forkname(int whichfork); ++ + #endif /* _XR_DINODE_H */ +diff --git a/repair/dir2.c b/repair/dir2.c +index 3aabcaa..06dc000 100644 +--- a/repair/dir2.c ++++ b/repair/dir2.c +@@ -552,7 +552,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"), + + newnode = bp->b_addr; + btree = xfs_da3_node_tree_p(newnode); +- xfs_da3_node_hdr_from_disk(&nodehdr, node); ++ xfs_da3_node_hdr_from_disk(&nodehdr, newnode); + /* + * verify magic number and back pointer, sanity-check + * entry count, verify level +diff --git a/repair/globals.h b/repair/globals.h +index aef8b79..f6e0a22 100644 +--- a/repair/globals.h ++++ b/repair/globals.h +@@ -49,7 +49,8 @@ + #define XR_BAD_SB_UNIT 17 /* bad stripe unit */ + #define XR_BAD_SB_WIDTH 18 /* bad stripe width */ + #define XR_BAD_SVN 19 /* bad shared version number */ +-#define XR_BAD_ERR_CODE 20 /* Bad error code */ ++#define XR_BAD_CRC 20 /* Bad CRC */ ++#define XR_BAD_ERR_CODE 21 /* Bad error code */ + + /* XFS filesystem (il)legal values */ + +@@ -186,7 +187,10 @@ EXTERN xfs_extlen_t sb_inoalignmt; + EXTERN __uint32_t sb_unit; + EXTERN __uint32_t sb_width; + +-EXTERN pthread_mutex_t *ag_locks; ++struct aglock { ++ pthread_mutex_t lock __attribute__((__aligned__(64))); ++}; ++EXTERN struct aglock *ag_locks; + + EXTERN int report_interval; + EXTERN __uint64_t *prog_rpt_done; +diff --git a/repair/incore.c b/repair/incore.c +index 3590464..a8d497e 100644 +--- a/repair/incore.c ++++ b/repair/incore.c +@@ -294,13 +294,13 @@ init_bmaps(xfs_mount_t *mp) + if (!ag_bmap) + do_error(_("couldn't allocate block map btree roots\n")); + +- ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(pthread_mutex_t)); ++ ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(struct aglock)); + if (!ag_locks) + do_error(_("couldn't allocate block map locks\n")); + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + btree_init(&ag_bmap[i]); +- pthread_mutex_init(&ag_locks[i], NULL); ++ pthread_mutex_init(&ag_locks[i].lock, NULL); + } + + init_rt_bmap(mp); +diff --git a/repair/incore.h b/repair/incore.h +index 38caa6d..5419884 100644 +--- a/repair/incore.h ++++ b/repair/incore.h +@@ -293,6 +293,7 @@ typedef struct ino_tree_node { + ino_ex_data_t *ex_data; /* phases 6,7 */ + parent_list_t *plist; /* phases 2-5 */ + } ino_un; ++ __uint8_t *ftypes; /* phases 3,6 */ + } ino_tree_node_t; + + #define INOS_PER_IREC (sizeof(__uint64_t) * NBBY) +@@ -359,7 +360,8 @@ ino_tree_node_t *find_uncertain_inode_rec(xfs_agnumber_t agno, + xfs_agino_t ino); + void add_inode_uncertain(xfs_mount_t *mp, + xfs_ino_t ino, int free); +-void add_aginode_uncertain(xfs_agnumber_t agno, ++void add_aginode_uncertain(struct xfs_mount *mp, ++ xfs_agnumber_t agno, + xfs_agino_t agino, int free); + void get_uncertain_inode_rec(struct xfs_mount *mp, + xfs_agnumber_t agno, +@@ -476,6 +478,29 @@ static inline void add_inode_reached(struct ino_tree_node *irec, int offset) + } + + /* ++ * get/set inode filetype. Only used if the superblock feature bit is set ++ * which allocates irec->ftypes. ++ */ ++static inline void ++set_inode_ftype(struct ino_tree_node *irec, ++ int ino_offset, ++ __uint8_t ftype) ++{ ++ if (irec->ftypes) ++ irec->ftypes[ino_offset] = ftype; ++} ++ ++static inline __uint8_t ++get_inode_ftype( ++ struct ino_tree_node *irec, ++ int ino_offset) ++{ ++ if (!irec->ftypes) ++ return XFS_DIR3_FT_UNKNOWN; ++ return irec->ftypes[ino_offset]; ++} ++ ++/* + * set/get inode number of parent -- works for directory inodes only + */ + void set_inode_parent(ino_tree_node_t *irec, int ino_offset, +diff --git a/repair/incore_ino.c b/repair/incore_ino.c +index 735737a..9502648 100644 +--- a/repair/incore_ino.c ++++ b/repair/incore_ino.c +@@ -211,6 +211,21 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) + return 0; + } + ++static __uint8_t * ++alloc_ftypes_array( ++ struct xfs_mount *mp) ++{ ++ __uint8_t *ptr; ++ ++ if (!xfs_sb_version_hasftype(&mp->m_sb)) ++ return NULL; ++ ++ ptr = calloc(XFS_INODES_PER_CHUNK, sizeof(*ptr)); ++ if (!ptr) ++ do_error(_("could not allocate ftypes array\n")); ++ return ptr; ++} ++ + /* + * Next is the uncertain inode list -- a sorted (in ascending order) + * list of inode records sorted on the starting inode number. There +@@ -226,6 +241,7 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) + */ + static struct ino_tree_node * + alloc_ino_node( ++ struct xfs_mount *mp, + xfs_agino_t starting_ino) + { + struct ino_tree_node *irec; +@@ -245,6 +261,7 @@ alloc_ino_node( + irec->ino_un.ex_data = NULL; + irec->nlink_size = sizeof(__uint8_t); + irec->disk_nlinks.un8 = alloc_nlink_array(irec->nlink_size); ++ irec->ftypes = alloc_ftypes_array(mp); + return irec; + } + +@@ -285,6 +302,7 @@ free_ino_tree_node( + + } + ++ free(irec->ftypes); + free(irec); + } + +@@ -303,7 +321,11 @@ static ino_tree_node_t **last_rec; + * free is set to 1 if the inode is thought to be free, 0 if used + */ + void +-add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) ++add_aginode_uncertain( ++ struct xfs_mount *mp, ++ xfs_agnumber_t agno, ++ xfs_agino_t ino, ++ int free) + { + ino_tree_node_t *ino_rec; + xfs_agino_t s_ino; +@@ -334,7 +356,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) + ino_rec = (ino_tree_node_t *) + avl_findrange(inode_uncertain_tree_ptrs[agno], s_ino); + if (!ino_rec) { +- ino_rec = alloc_ino_node(s_ino); ++ ino_rec = alloc_ino_node(mp, s_ino); + + if (!avl_insert(inode_uncertain_tree_ptrs[agno], + &ino_rec->avl_node)) +@@ -360,7 +382,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) + void + add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free) + { +- add_aginode_uncertain(XFS_INO_TO_AGNO(mp, ino), ++ add_aginode_uncertain(mp, XFS_INO_TO_AGNO(mp, ino), + XFS_INO_TO_AGINO(mp, ino), free); + } + +@@ -432,7 +454,7 @@ add_inode( + { + struct ino_tree_node *irec; + +- irec = alloc_ino_node(agino); ++ irec = alloc_ino_node(mp, agino); + if (!avl_insert(inode_tree_ptrs[agno], &irec->avl_node)) + do_warn(_("add_inode - duplicate inode range\n")); + return irec; +diff --git a/repair/init.c b/repair/init.c +index c3f380b..d0940aa 100644 +--- a/repair/init.c ++++ b/repair/init.c +@@ -97,8 +97,17 @@ xfs_init(libxfs_init_t *args) + else + args->isreadonly = LIBXFS_EXCLUSIVELY; + +- if (!libxfs_init(args)) ++ if (!libxfs_init(args)) { ++ /* would -d be an option? */ ++ if (!no_modify && !dangerously) { ++ args->isreadonly = (LIBXFS_ISINACTIVE | ++ LIBXFS_DANGEROUSLY); ++ if (libxfs_init(args)) ++ fprintf(stderr, ++_("Unmount or use the dangerous (-d) option to repair a read-only mounted filesystem\n")); ++ } + do_error(_("couldn't initialize XFS library\n")); ++ } + + ts_create(); + increase_rlimit(); +diff --git a/repair/phase1.c b/repair/phase1.c +index 62de211..ec75ada 100644 +--- a/repair/phase1.c ++++ b/repair/phase1.c +@@ -70,13 +70,14 @@ phase1(xfs_mount_t *mp) + ag_bp = alloc_ag_buf(MAX_SECTSIZE); + sb = (xfs_sb_t *) ag_bp; + +- if (get_sb(sb, 0LL, MAX_SECTSIZE, 0) == XR_EOF) ++ rval = get_sb(sb, 0LL, MAX_SECTSIZE, 0); ++ if (rval == XR_EOF) + do_error(_("error reading primary superblock\n")); + + /* + * is this really an sb, verify internal consistency + */ +- if ((rval = verify_sb(sb, 1)) != XR_OK) { ++ if (rval != XR_OK) { + do_warn(_("bad primary superblock - %s !!!\n"), + err_string(rval)); + if (!find_secondary_sb(sb)) +diff --git a/repair/phase3.c b/repair/phase3.c +index 3e43938..213d368 100644 +--- a/repair/phase3.c ++++ b/repair/phase3.c +@@ -17,6 +17,8 @@ + */ + + #include ++#include "threads.h" ++#include "prefetch.h" + #include "avl.h" + #include "globals.h" + #include "agheader.h" +@@ -24,9 +26,7 @@ + #include "protos.h" + #include "err_protos.h" + #include "dinode.h" +-#include "threads.h" + #include "progress.h" +-#include "prefetch.h" + + static void + process_agi_unlinked( +@@ -82,41 +82,7 @@ static void + process_ags( + xfs_mount_t *mp) + { +- int i, j; +- xfs_agnumber_t agno; +- work_queue_t *queues; +- prefetch_args_t *pf_args[2]; +- +- queues = malloc(thread_count * sizeof(work_queue_t)); +- +- if (ag_stride) { +- /* +- * create one worker thread for each segment of the volume +- */ +- for (i = 0, agno = 0; i < thread_count; i++) { +- create_work_queue(&queues[i], mp, 1); +- pf_args[0] = NULL; +- for (j = 0; j < ag_stride && agno < mp->m_sb.sb_agcount; +- j++, agno++) { +- pf_args[0] = start_inode_prefetch(agno, 0, pf_args[0]); +- queue_work(&queues[i], process_ag_func, agno, pf_args[0]); +- } +- } +- /* +- * wait for workers to complete +- */ +- for (i = 0; i < thread_count; i++) +- destroy_work_queue(&queues[i]); +- } else { +- queues[0].mp = mp; +- pf_args[0] = start_inode_prefetch(0, 0, NULL); +- for (i = 0; i < mp->m_sb.sb_agcount; i++) { +- pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 0, +- pf_args[i & 1]); +- process_ag_func(&queues[0], i, pf_args[i & 1]); +- } +- } +- free(queues); ++ do_inode_prefetch(mp, ag_stride, process_ag_func, false, false); + } + + void +diff --git a/repair/phase4.c b/repair/phase4.c +index a822aaa..189eeb9 100644 +--- a/repair/phase4.c ++++ b/repair/phase4.c +@@ -17,6 +17,8 @@ + */ + + #include ++#include "threads.h" ++#include "prefetch.h" + #include "avl.h" + #include "globals.h" + #include "agheader.h" +@@ -27,9 +29,7 @@ + #include "bmap.h" + #include "versions.h" + #include "dir2.h" +-#include "threads.h" + #include "progress.h" +-#include "prefetch.h" + + + /* +@@ -150,49 +150,7 @@ static void + process_ags( + xfs_mount_t *mp) + { +- int i, j; +- xfs_agnumber_t agno; +- work_queue_t *queues; +- prefetch_args_t *pf_args[2]; +- +- queues = malloc(thread_count * sizeof(work_queue_t)); +- +- if (!libxfs_bcache_overflowed()) { +- queues[0].mp = mp; +- create_work_queue(&queues[0], mp, libxfs_nproc()); +- for (i = 0; i < mp->m_sb.sb_agcount; i++) +- queue_work(&queues[0], process_ag_func, i, NULL); +- destroy_work_queue(&queues[0]); +- } else { +- if (ag_stride) { +- /* +- * create one worker thread for each segment of the volume +- */ +- for (i = 0, agno = 0; i < thread_count; i++) { +- create_work_queue(&queues[i], mp, 1); +- pf_args[0] = NULL; +- for (j = 0; j < ag_stride && agno < mp->m_sb.sb_agcount; +- j++, agno++) { +- pf_args[0] = start_inode_prefetch(agno, 0, pf_args[0]); +- queue_work(&queues[i], process_ag_func, agno, pf_args[0]); +- } +- } +- /* +- * wait for workers to complete +- */ +- for (i = 0; i < thread_count; i++) +- destroy_work_queue(&queues[i]); +- } else { +- queues[0].mp = mp; +- pf_args[0] = start_inode_prefetch(0, 0, NULL); +- for (i = 0; i < mp->m_sb.sb_agcount; i++) { +- pf_args[(~i) & 1] = start_inode_prefetch(i + 1, +- 0, pf_args[i & 1]); +- process_ag_func(&queues[0], i, pf_args[i & 1]); +- } +- } +- } +- free(queues); ++ do_inode_prefetch(mp, ag_stride, process_ag_func, true, false); + } + + +diff --git a/repair/phase6.c b/repair/phase6.c +index d2d4a44..446f3ee 100644 +--- a/repair/phase6.c ++++ b/repair/phase6.c +@@ -17,6 +17,8 @@ + */ + + #include ++#include "threads.h" ++#include "prefetch.h" + #include "avl.h" + #include "globals.h" + #include "agheader.h" +@@ -25,9 +27,7 @@ + #include "protos.h" + #include "err_protos.h" + #include "dinode.h" +-#include "prefetch.h" + #include "progress.h" +-#include "threads.h" + #include "versions.h" + + static struct cred zerocr; +@@ -43,13 +43,13 @@ static struct xfs_name xfs_name_dot = {(unsigned char *)".", + * entries are updated. These must be rebuilt after the initial pass + */ + typedef struct dotdot_update { +- struct dotdot_update *next; ++ struct list_head list; + ino_tree_node_t *irec; + xfs_agnumber_t agno; + int ino_offset; + } dotdot_update_t; + +-static dotdot_update_t *dotdot_update_list; ++static LIST_HEAD(dotdot_update_list); + static int dotdot_update; + + static void +@@ -64,12 +64,12 @@ add_dotdot_update( + do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"), + sizeof(dotdot_update_t)); + +- dir->next = dotdot_update_list; ++ INIT_LIST_HEAD(&dir->list); + dir->irec = irec; + dir->agno = agno; + dir->ino_offset = ino_offset; + +- dotdot_update_list = dir; ++ list_add(&dir->list, &dotdot_update_list); + } + + /* +@@ -134,7 +134,8 @@ dir_hash_add( + __uint32_t addr, + xfs_ino_t inum, + int namelen, +- unsigned char *name) ++ unsigned char *name, ++ __uint8_t ftype) + { + xfs_dahash_t hash = 0; + int byaddr; +@@ -148,6 +149,7 @@ dir_hash_add( + + xname.name = name; + xname.len = namelen; ++ xname.type = ftype; + + junk = name[0] == '/'; + byaddr = DIR_HASH_FUNC(hashtab, addr); +@@ -312,6 +314,23 @@ dir_hash_see( + return DIR_HASH_CK_NODATA; + } + ++static void ++dir_hash_update_ftype( ++ dir_hash_tab_t *hashtab, ++ xfs_dir2_dataptr_t addr, ++ __uint8_t ftype) ++{ ++ int i; ++ dir_hash_ent_t *p; ++ ++ i = DIR_HASH_FUNC(hashtab, addr); ++ for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) { ++ if (p->address != addr) ++ continue; ++ p->name.type = ftype; ++ } ++} ++ + /* + * checks to make sure leafs match a data entry, and that the stale + * count is valid. +@@ -1685,11 +1704,12 @@ longform_dir2_entry_check_data( + if (!orphanage_ino) + orphanage_ino = inum; + } ++ + /* + * check for duplicate names in directory. + */ + if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen, +- dep->name)) { ++ dep->name, xfs_dir3_dirent_get_ftype(mp, dep))) { + nbad++; + if (entry_junked( + _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), +@@ -1763,6 +1783,35 @@ longform_dir2_entry_check_data( + */ + if (no_modify && verify_inum(mp, inum)) + continue; ++ ++ /* validate ftype field if supported */ ++ if (xfs_sb_version_hasftype(&mp->m_sb)) { ++ __uint8_t dir_ftype; ++ __uint8_t ino_ftype; ++ ++ dir_ftype = xfs_dir3_dirent_get_ftype(mp, dep); ++ ino_ftype = get_inode_ftype(irec, ino_offset); ++ ++ if (dir_ftype != ino_ftype) { ++ if (no_modify) { ++ do_warn( ++ _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), ++ dir_ftype, ino_ftype, ++ ip->i_ino, inum); ++ } else { ++ do_warn( ++ _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), ++ dir_ftype, ino_ftype, ++ ip->i_ino, inum); ++ xfs_dir3_dirent_put_ftype(mp, dep, ++ ino_ftype); ++ libxfs_dir2_data_log_entry(tp, bp, dep); ++ dir_hash_update_ftype(hashtab, addr, ++ ino_ftype); ++ } ++ } ++ } ++ + /* + * check easy case first, regular inode, just bump + * the link count and continue +@@ -2189,6 +2238,62 @@ out_fix: + * shortform directory v2 processing routines -- entry verification and + * bad entry deletion (pruning). + */ ++static struct xfs_dir2_sf_entry * ++shortform_dir2_junk( ++ struct xfs_mount *mp, ++ struct xfs_dir2_sf_hdr *sfp, ++ struct xfs_dir2_sf_entry *sfep, ++ xfs_ino_t lino, ++ int *max_size, ++ int *index, ++ int *bytes_deleted, ++ int *ino_dirty) ++{ ++ struct xfs_dir2_sf_entry *next_sfep; ++ int next_len; ++ int next_elen; ++ ++ if (lino == orphanage_ino) ++ orphanage_ino = 0; ++ ++ next_elen = xfs_dir3_sf_entsize(mp, sfp, sfep->namelen); ++ next_sfep = (xfs_dir2_sf_entry_t *)((__psint_t)sfep + next_elen); ++ ++ /* ++ * if we are just checking, simply return the pointer to the next entry ++ * here so that the checking loop can continue. ++ */ ++ if (no_modify) { ++ do_warn(_("would junk entry\n")); ++ return next_sfep; ++ } ++ ++ /* ++ * now move all the remaining entries down over the junked entry and ++ * clear the newly unused bytes at the tail of the directory region. ++ */ ++ next_len = *max_size - ((__psint_t)next_sfep - (__psint_t)sfp); ++ *max_size -= next_elen; ++ *bytes_deleted += next_elen; ++ ++ memmove(sfep, next_sfep, next_len); ++ memset((void *)((__psint_t)sfep + next_len), 0, next_elen); ++ sfp->count -= 1; ++ *ino_dirty = 1; ++ ++ /* ++ * WARNING: drop the index i by one so it matches the decremented count ++ * for accurate comparisons in the loop test ++ */ ++ (*index)--; ++ ++ if (verbose) ++ do_warn(_("junking entry\n")); ++ else ++ do_warn("\n"); ++ return sfep; ++} ++ + static void + shortform_dir2_entry_check(xfs_mount_t *mp, + xfs_ino_t ino, +@@ -2201,15 +2306,13 @@ shortform_dir2_entry_check(xfs_mount_t *mp, + xfs_ino_t lino; + xfs_ino_t parent; + struct xfs_dir2_sf_hdr *sfp; +- xfs_dir2_sf_entry_t *sfep, *next_sfep, *tmp_sfep; +- xfs_ifork_t *ifp; +- ino_tree_node_t *irec; ++ struct xfs_dir2_sf_entry *sfep; ++ struct xfs_dir2_sf_entry *next_sfep; ++ struct xfs_ifork *ifp; ++ struct ino_tree_node *irec; + int max_size; + int ino_offset; + int i; +- int junkit; +- int tmp_len; +- int tmp_elen; + int bad_sfnamelen; + int namelen; + int bytes_deleted; +@@ -2266,9 +2369,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, + for (i = 0; i < sfp->count && max_size > + (__psint_t)next_sfep - (__psint_t)sfp; + sfep = next_sfep, i++) { +- junkit = 0; + bad_sfnamelen = 0; +- tmp_sfep = NULL; + + lino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); + +@@ -2340,7 +2441,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, + do_warn( + _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"), + fname, ino, lino); +- goto do_junkit; ++ next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, ++ &max_size, &i, &bytes_deleted, ++ ino_dirty); ++ continue; + } + + ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum; +@@ -2354,7 +2458,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, + do_warn( + _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"), + fname, ino, lino); +- goto do_junkit; ++ next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, ++ &max_size, &i, &bytes_deleted, ++ ino_dirty); ++ continue; + } + /* + * check if this inode is lost+found dir in the root +@@ -2367,7 +2474,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, + do_warn( + _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), + ORPHANAGE, lino, ino); +- goto do_junkit; ++ next_sfep = shortform_dir2_junk(mp, sfp, sfep, ++ lino, &max_size, &i, ++ &bytes_deleted, ino_dirty); ++ continue; + } + /* + * if this is a dup, it will be picked up below, +@@ -2381,11 +2491,15 @@ shortform_dir2_entry_check(xfs_mount_t *mp, + */ + if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t) + (sfep - xfs_dir2_sf_firstentry(sfp)), +- lino, sfep->namelen, sfep->name)) { ++ lino, sfep->namelen, sfep->name, ++ xfs_dir3_sfe_get_ftype(mp, sfp, sfep))) { + do_warn( + _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), + fname, lino, ino); +- goto do_junkit; ++ next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, ++ &max_size, &i, &bytes_deleted, ++ ino_dirty); ++ continue; + } + + if (!inode_isadir(irec, ino_offset)) { +@@ -2403,11 +2517,14 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), + * the .. in the child, blow out the entry + */ + if (is_inode_reached(irec, ino_offset)) { +- junkit = 1; + do_warn( + _("entry \"%s\" in directory inode %" PRIu64 + " references already connected inode %" PRIu64 ".\n"), + fname, ino, lino); ++ next_sfep = shortform_dir2_junk(mp, sfp, sfep, ++ lino, &max_size, &i, ++ &bytes_deleted, ino_dirty); ++ continue; + } else if (parent == ino) { + add_inode_reached(irec, ino_offset); + add_inode_ref(current_irec, current_ino_offset); +@@ -2423,76 +2540,60 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), + add_dotdot_update(XFS_INO_TO_AGNO(mp, lino), + irec, ino_offset); + } else { +- junkit = 1; + do_warn( + _("entry \"%s\" in directory inode %" PRIu64 + " not consistent with .. value (%" PRIu64 + ") in inode %" PRIu64 ",\n"), + fname, ino, parent, lino); ++ next_sfep = shortform_dir2_junk(mp, sfp, sfep, ++ lino, &max_size, &i, ++ &bytes_deleted, ino_dirty); ++ continue; + } + } + +- if (junkit) { +-do_junkit: +- if (lino == orphanage_ino) +- orphanage_ino = 0; +- if (!no_modify) { +- tmp_elen = xfs_dir3_sf_entsize(mp, sfp, +- sfep->namelen); +- tmp_sfep = (xfs_dir2_sf_entry_t *) +- ((__psint_t) sfep + tmp_elen); +- tmp_len = max_size - ((__psint_t) tmp_sfep +- - (__psint_t) sfp); +- max_size -= tmp_elen; +- bytes_deleted += tmp_elen; +- +- memmove(sfep, tmp_sfep, tmp_len); +- +- sfp->count -= 1; +- memset((void *)((__psint_t)sfep + tmp_len), 0, +- tmp_elen); ++ /* validate ftype field if supported */ ++ if (xfs_sb_version_hasftype(&mp->m_sb)) { ++ __uint8_t dir_ftype; ++ __uint8_t ino_ftype; + +- /* +- * set the tmp value to the current +- * pointer so we'll process the entry +- * we just moved up +- */ +- tmp_sfep = sfep; +- +- /* +- * WARNING: drop the index i by one +- * so it matches the decremented count for +- * accurate comparisons in the loop test +- */ +- i--; +- +- *ino_dirty = 1; ++ dir_ftype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep); ++ ino_ftype = get_inode_ftype(irec, ino_offset); + +- if (verbose) +- do_warn(_("junking entry\n")); +- else +- do_warn("\n"); +- } else { +- do_warn(_("would junk entry\n")); ++ if (dir_ftype != ino_ftype) { ++ if (no_modify) { ++ do_warn( ++ _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), ++ dir_ftype, ino_ftype, ++ ino, lino); ++ } else { ++ do_warn( ++ _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), ++ dir_ftype, ino_ftype, ++ ino, lino); ++ xfs_dir3_sfe_put_ftype(mp, sfp, sfep, ++ ino_ftype); ++ dir_hash_update_ftype(hashtab, ++ (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)), ++ ino_ftype); ++ *ino_dirty = 1; ++ } + } +- } else if (lino > XFS_DIR2_MAX_SHORT_INUM) ++ } ++ ++ if (lino > XFS_DIR2_MAX_SHORT_INUM) + i8++; + + /* +- * go onto next entry unless we've just junked an +- * entry in which the current entry pointer points +- * to an unprocessed entry. have to take into entries +- * with bad namelen into account in no modify mode since we +- * calculate size based on next_sfep. ++ * go onto next entry - we have to take entries with bad namelen ++ * into account in no modify mode since we calculate size based ++ * on next_sfep. + */ + ASSERT(no_modify || bad_sfnamelen == 0); +- +- next_sfep = (tmp_sfep == NULL) +- ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep +- + ((!bad_sfnamelen) +- ? xfs_dir3_sf_entsize(mp, sfp, sfep->namelen) +- : xfs_dir3_sf_entsize(mp, sfp, namelen))) +- : tmp_sfep; ++ next_sfep = (struct xfs_dir2_sf_entry *)((__psint_t)sfep + ++ (bad_sfnamelen ++ ? xfs_dir3_sf_entsize(mp, sfp, namelen) ++ : xfs_dir3_sf_entsize(mp, sfp, sfep->namelen))); + } + + if (sfp->i8count != i8) { +@@ -2501,6 +2602,8 @@ do_junkit: + ino); + } else { + if (i8 == 0) { ++ struct xfs_dir2_sf_entry *tmp_sfep; ++ + tmp_sfep = next_sfep; + process_sf_dir2_fixi8(mp, sfp, &tmp_sfep); + bytes_deleted += +@@ -2518,8 +2621,7 @@ do_junkit: + /* + * sync up sizes if required + */ +- if (*ino_dirty) { +- ASSERT(bytes_deleted > 0); ++ if (*ino_dirty && bytes_deleted > 0) { + ASSERT(!no_modify); + libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK); + ip->i_d.di_size -= bytes_deleted; +@@ -2897,8 +2999,15 @@ traverse_function( + if (irec->ino_isa_dir == 0) + continue; + +- if (pf_args) ++ if (pf_args) { + sem_post(&pf_args->ra_count); ++#ifdef XR_PF_TRACE ++ sem_getvalue(&pf_args->ra_count, &i); ++ pftrace( ++ "processing inode chunk %p in AG %d (sem count = %d)", ++ irec, agno, i); ++#endif ++ } + + for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { + if (inode_isadir(irec, i)) +@@ -2919,9 +3028,10 @@ update_missing_dotdot_entries( + * set dotdot_update flag so processing routines do not count links + */ + dotdot_update = 1; +- while (dotdot_update_list) { +- dir = dotdot_update_list; +- dotdot_update_list = dir->next; ++ while (!list_empty(&dotdot_update_list)) { ++ dir = list_entry(dotdot_update_list.prev, struct dotdot_update, ++ list); ++ list_del(&dir->list); + process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset); + free(dir); + } +@@ -2929,23 +3039,9 @@ update_missing_dotdot_entries( + + static void + traverse_ags( +- xfs_mount_t *mp) ++ struct xfs_mount *mp) + { +- int i; +- work_queue_t queue; +- prefetch_args_t *pf_args[2]; +- +- /* +- * we always do prefetch for phase 6 as it will fill in the gaps +- * not read during phase 3 prefetch. +- */ +- queue.mp = mp; +- pf_args[0] = start_inode_prefetch(0, 1, NULL); +- for (i = 0; i < glob_agcount; i++) { +- pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 1, +- pf_args[i & 1]); +- traverse_function(&queue, i, pf_args[i & 1]); +- } ++ do_inode_prefetch(mp, 0, traverse_function, false, true); + } + + void +diff --git a/repair/prefetch.c b/repair/prefetch.c +index d3491da..e47a48e 100644 +--- a/repair/prefetch.c ++++ b/repair/prefetch.c +@@ -105,11 +105,12 @@ pf_start_io_workers( + static void + pf_queue_io( + prefetch_args_t *args, +- xfs_fsblock_t fsbno, +- int blen, ++ struct xfs_buf_map *map, ++ int nmaps, + int flag) + { +- xfs_buf_t *bp; ++ struct xfs_buf *bp; ++ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, map[0].bm_bn); + + /* + * Never block on a buffer lock here, given that the actual repair +@@ -117,8 +118,7 @@ pf_queue_io( + * the lock holder is either reading it from disk himself or + * completely overwriting it this behaviour is perfectly fine. + */ +- bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), +- XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK); ++ bp = libxfs_getbuf_map(mp->m_dev, map, nmaps, LIBXFS_GETBUF_TRYLOCK); + if (!bp) + return; + +@@ -167,6 +167,14 @@ pf_read_bmbt_reclist( + xfs_bmbt_irec_t irec; + xfs_dfilblks_t cp = 0; /* prev count */ + xfs_dfiloff_t op = 0; /* prev offset */ ++#define MAP_ARRAY_SZ 4 ++ struct xfs_buf_map map_array[MAP_ARRAY_SZ]; ++ struct xfs_buf_map *map = map_array; ++ int max_extents = MAP_ARRAY_SZ; ++ int nmaps = 0;; ++ unsigned int len = 0; ++ int ret = 0; ++ + + for (i = 0; i < numrecs; i++) { + libxfs_bmbt_disk_get_all(rp + i, &irec); +@@ -174,11 +182,11 @@ pf_read_bmbt_reclist( + if (((i > 0) && (op + cp > irec.br_startoff)) || + (irec.br_blockcount == 0) || + (irec.br_startoff >= fs_max_file_offset)) +- return 0; ++ goto out_free; + + if (!verify_dfsbno(mp, irec.br_startblock) || !verify_dfsbno(mp, + irec.br_startblock + irec.br_blockcount - 1)) +- return 0; ++ goto out_free; + + if (!args->dirs_only && ((irec.br_startoff + + irec.br_blockcount) >= mp->m_dirfreeblk)) +@@ -188,18 +196,59 @@ pf_read_bmbt_reclist( + cp = irec.br_blockcount; + + while (irec.br_blockcount) { +- unsigned int len; ++ unsigned int bm_len; + + pftrace("queuing dir extent in AG %d", args->agno); + +- len = (irec.br_blockcount > mp->m_dirblkfsbs) ? +- mp->m_dirblkfsbs : irec.br_blockcount; +- pf_queue_io(args, irec.br_startblock, len, B_DIR_META); +- irec.br_blockcount -= len; +- irec.br_startblock += len; ++ if (len + irec.br_blockcount >= mp->m_dirblkfsbs) ++ bm_len = mp->m_dirblkfsbs - len; ++ else ++ bm_len = irec.br_blockcount; ++ len += bm_len; ++ ++ map[nmaps].bm_bn = XFS_FSB_TO_DADDR(mp, ++ irec.br_startblock); ++ map[nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len); ++ nmaps++; ++ ++ if (len == mp->m_dirblkfsbs) { ++ pf_queue_io(args, map, nmaps, B_DIR_META); ++ len = 0; ++ nmaps = 0; ++ } ++ ++ irec.br_blockcount -= bm_len; ++ irec.br_startblock += bm_len; ++ ++ /* ++ * Handle very fragmented dir2 blocks with dynamically ++ * allocated buffer maps. ++ */ ++ if (nmaps >= max_extents) { ++ struct xfs_buf_map *old_map = NULL; ++ ++ if (map == map_array) { ++ old_map = map; ++ map = NULL; ++ } ++ max_extents *= 2; ++ map = realloc(map, max_extents * sizeof(*map)); ++ if (map == NULL) { ++ do_error( ++ _("couldn't malloc dir2 buffer list\n")); ++ exit(1); ++ } ++ if (old_map) ++ memcpy(map, old_map, sizeof(map_array)); ++ } ++ + } + } +- return 1; ++ ret = 1; ++out_free: ++ if (map != map_array) ++ free(map); ++ return ret; + } + + /* +@@ -249,7 +298,8 @@ pf_scanfunc_bmap( + /* + * do some validation on the block contents + */ +- if ((be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC) || ++ if ((block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) && ++ block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC)) || + (be16_to_cpu(block->bb_level) != level)) + return 0; + +@@ -395,9 +445,28 @@ pf_read_inode_dirs( + } + + /* +- * pf_batch_read must be called with the lock locked. ++ * Discontiguous buffers require multiple IOs to fill, so we can't use any ++ * linearising, hole filling algorithms on them to avoid seeks. Just remove them ++ * for the prefetch queue and read them straight into the cache and release ++ * them. + */ ++static void ++pf_read_discontig( ++ struct prefetch_args *args, ++ struct xfs_buf *bp) ++{ ++ if (!btree_delete(args->io_queue, XFS_DADDR_TO_FSB(mp, bp->b_bn))) ++ do_error(_("prefetch corruption\n")); ++ ++ pthread_mutex_unlock(&args->lock); ++ libxfs_readbufr_map(mp->m_ddev_targp, bp, 0); ++ libxfs_putbuf(bp); ++ pthread_mutex_lock(&args->lock); ++} + ++/* ++ * pf_batch_read must be called with the lock locked. ++ */ + static void + pf_batch_read( + prefetch_args_t *args, +@@ -426,8 +495,15 @@ pf_batch_read( + max_fsbno = fsbno + pf_max_fsbs; + } + while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) { +- if (which != PF_META_ONLY || +- !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num]))) ++ /* ++ * Handle discontiguous buffers outside the seek ++ * optimised IO loop below. ++ */ ++ if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) { ++ pf_read_discontig(args, bplist[num]); ++ bplist[num] = NULL; ++ } else if (which != PF_META_ONLY || ++ !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num]))) + num++; + if (num == MAX_BUFS) + break; +@@ -648,7 +724,7 @@ pf_queuing_worker( + irec, args->agno, i); + #endif + err = sem_trywait(&args->ra_count); +- if (err == EAGAIN) { ++ if (err < 0 && errno == EAGAIN) { + /* + * Kick the queue once we have reached the limit; + * without this the threads processing the inodes +@@ -664,10 +740,13 @@ pf_queuing_worker( + bno = XFS_AGINO_TO_AGBNO(mp, cur_irec->ino_startnum); + + do { +- pf_queue_io(args, XFS_AGB_TO_FSB(mp, args->agno, bno), +- blks_per_cluster, +- (cur_irec->ino_isa_dir != 0) ? +- B_DIR_INODE : B_INODE); ++ struct xfs_buf_map map; ++ ++ map.bm_bn = XFS_AGB_TO_DADDR(mp, args->agno, bno); ++ map.bm_len = XFS_FSB_TO_BB(mp, blks_per_cluster); ++ pf_queue_io(args, &map, 1, ++ (cur_irec->ino_isa_dir != 0) ? B_DIR_INODE ++ : B_INODE); + bno += blks_per_cluster; + num_inos += inodes_per_cluster; + } while (num_inos < XFS_IALLOC_INODES(mp)); +@@ -787,6 +866,140 @@ start_inode_prefetch( + return args; + } + ++/* ++ * prefetch_ag_range runs a prefetch-and-process loop across a range of AGs. It ++ * begins with @start+ag, and finishes with @end_ag - 1 (i.e. does not prefetch ++ * or process @end_ag). The function starts prefetch on the first AG, then loops ++ * starting prefetch on the next AG and then blocks processing the current AG as ++ * the prefetch queue brings inodes into the processing queue. ++ * ++ * There is only one prefetch taking place at a time, so the prefetch on the ++ * next AG only starts once the current AG has been completely prefetched. Hence ++ * the prefetch of the next AG will start some time before the processing of the ++ * current AG finishes, ensuring that when we iterate an start processing the ++ * next AG there is already a significant queue of inodes to process. ++ * ++ * Prefetch is done this way to prevent it from running too far ahead of the ++ * processing. Allowing it to do so can cause cache thrashing, where new ++ * prefetch causes previously prefetched buffers to be reclaimed before the ++ * processing thread uses them. This results in reading all the inodes and ++ * metadata twice per phase and it greatly slows down the processing. Hence we ++ * have to carefully control how far ahead we prefetch... ++ */ ++static void ++prefetch_ag_range( ++ struct work_queue *work, ++ xfs_agnumber_t start_ag, ++ xfs_agnumber_t end_ag, ++ bool dirs_only, ++ void (*func)(struct work_queue *, ++ xfs_agnumber_t, void *)) ++{ ++ int i; ++ struct prefetch_args *pf_args[2]; ++ ++ pf_args[start_ag & 1] = start_inode_prefetch(start_ag, dirs_only, NULL); ++ for (i = start_ag; i < end_ag; i++) { ++ /* Don't prefetch end_ag */ ++ if (i + 1 < end_ag) ++ pf_args[(~i) & 1] = start_inode_prefetch(i + 1, ++ dirs_only, pf_args[i & 1]); ++ func(work, i, pf_args[i & 1]); ++ } ++} ++ ++struct pf_work_args { ++ xfs_agnumber_t start_ag; ++ xfs_agnumber_t end_ag; ++ bool dirs_only; ++ void (*func)(struct work_queue *, xfs_agnumber_t, void *); ++}; ++ ++static void ++prefetch_ag_range_work( ++ struct work_queue *work, ++ xfs_agnumber_t unused, ++ void *args) ++{ ++ struct pf_work_args *wargs = args; ++ ++ prefetch_ag_range(work, wargs->start_ag, wargs->end_ag, ++ wargs->dirs_only, wargs->func); ++ free(args); ++} ++ ++/* ++ * Do inode prefetch in the most optimal way for the context under which repair ++ * has been run. ++ */ ++void ++do_inode_prefetch( ++ struct xfs_mount *mp, ++ int stride, ++ void (*func)(struct work_queue *, ++ xfs_agnumber_t, void *), ++ bool check_cache, ++ bool dirs_only) ++{ ++ int i; ++ struct work_queue queue; ++ struct work_queue *queues; ++ ++ /* ++ * If the previous phases of repair have not overflowed the buffer ++ * cache, then we don't need to re-read any of the metadata in the ++ * filesystem - it's all in the cache. In that case, run a thread per ++ * CPU to maximise parallelism of the queue to be processed. ++ */ ++ if (check_cache && !libxfs_bcache_overflowed()) { ++ queue.mp = mp; ++ create_work_queue(&queue, mp, libxfs_nproc()); ++ for (i = 0; i < mp->m_sb.sb_agcount; i++) ++ queue_work(&queue, func, i, NULL); ++ destroy_work_queue(&queue); ++ return; ++ } ++ ++ /* ++ * single threaded behaviour - single prefetch thread, processed ++ * directly after each AG is queued. ++ */ ++ if (!stride) { ++ queue.mp = mp; ++ prefetch_ag_range(&queue, 0, mp->m_sb.sb_agcount, ++ dirs_only, func); ++ return; ++ } ++ ++ /* ++ * create one worker thread for each segment of the volume ++ */ ++ queues = malloc(thread_count * sizeof(work_queue_t)); ++ for (i = 0; i < thread_count; i++) { ++ struct pf_work_args *wargs; ++ ++ wargs = malloc(sizeof(struct pf_work_args)); ++ wargs->start_ag = i * stride; ++ wargs->end_ag = min((i + 1) * stride, ++ mp->m_sb.sb_agcount); ++ wargs->dirs_only = dirs_only; ++ wargs->func = func; ++ ++ create_work_queue(&queues[i], mp, 1); ++ queue_work(&queues[i], prefetch_ag_range_work, 0, wargs); ++ ++ if (wargs->end_ag >= mp->m_sb.sb_agcount) ++ break; ++ } ++ ++ /* ++ * wait for workers to complete ++ */ ++ while (i--) ++ destroy_work_queue(&queues[i]); ++ free(queues); ++} ++ + void + wait_for_inode_prefetch( + prefetch_args_t *args) +diff --git a/repair/prefetch.h b/repair/prefetch.h +index 44a406c..b837752 100644 +--- a/repair/prefetch.h ++++ b/repair/prefetch.h +@@ -4,6 +4,7 @@ + #include + #include "incore.h" + ++struct work_queue; + + extern int do_prefetch; + +@@ -41,6 +42,15 @@ start_inode_prefetch( + prefetch_args_t *prev_args); + + void ++do_inode_prefetch( ++ struct xfs_mount *mp, ++ int stride, ++ void (*func)(struct work_queue *, ++ xfs_agnumber_t, void *), ++ bool check_cache, ++ bool dirs_only); ++ ++void + wait_for_inode_prefetch( + prefetch_args_t *args); + +diff --git a/repair/protos.h b/repair/protos.h +index 601f2a9..ff42fa7 100644 +--- a/repair/protos.h ++++ b/repair/protos.h +@@ -18,7 +18,8 @@ + + void xfs_init(libxfs_init_t *args); + +-int verify_sb(xfs_sb_t *sb, ++int verify_sb(char *sb_buf, ++ xfs_sb_t *sb, + int is_primary_sb); + int verify_set_primary_sb(xfs_sb_t *root_sb, + int sb_index, +diff --git a/repair/sb.c b/repair/sb.c +index c54d89b..b111aca 100644 +--- a/repair/sb.c ++++ b/repair/sb.c +@@ -139,7 +139,7 @@ find_secondary_sb(xfs_sb_t *rsb) + c_bufsb = (char *)sb + i; + libxfs_sb_from_disk(&bufsb, (xfs_dsb_t *)c_bufsb); + +- if (verify_sb(&bufsb, 0) != XR_OK) ++ if (verify_sb(c_bufsb, &bufsb, 0) != XR_OK) + continue; + + do_warn(_("found candidate secondary superblock...\n")); +@@ -245,7 +245,7 @@ sb_validate_ino_align(struct xfs_sb *sb) + */ + + int +-verify_sb(xfs_sb_t *sb, int is_primary_sb) ++verify_sb(char *sb_buf, xfs_sb_t *sb, int is_primary_sb) + { + __uint32_t bsize; + int i; +@@ -263,8 +263,34 @@ verify_sb(xfs_sb_t *sb, int is_primary_sb) + if (is_primary_sb && sb->sb_inprogress == 1) + return(XR_BAD_INPROGRESS); + +- /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */ ++ /* ++ * before going *any further*, validate the sector size and if the ++ * version says we should have CRCs enabled, validate that. ++ */ ++ ++ /* check to make sure sectorsize is legal 2^N, 9 <= N <= 15 */ ++ if (sb->sb_sectsize == 0) ++ return(XR_BAD_SECT_SIZE_DATA); ++ ++ bsize = 1; ++ for (i = 0; bsize < sb->sb_sectsize && ++ i < sizeof(sb->sb_sectsize) * NBBY; i++) { ++ bsize <<= 1; ++ } ++ ++ if (i < XFS_MIN_SECTORSIZE_LOG || i > XFS_MAX_SECTORSIZE_LOG) ++ return(XR_BAD_SECT_SIZE_DATA); ++ ++ /* check sb sectorsize field against sb sectlog field */ ++ if (i != sb->sb_sectlog) ++ return(XR_BAD_SECT_SIZE_DATA); ++ ++ /* sector size in range - CRC check time */ ++ if (xfs_sb_version_hascrc(sb) && ++ !xfs_verify_cksum(sb_buf, sb->sb_sectsize, XFS_SB_CRC_OFF)) ++ return XR_BAD_CRC; + ++ /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */ + if (sb->sb_blocksize == 0) + return(XR_BAD_BLOCKSIZE); + +@@ -300,26 +326,6 @@ verify_sb(xfs_sb_t *sb, int is_primary_sb) + sb->sb_inopblock != howmany(sb->sb_blocksize,sb->sb_inodesize)) + return(XR_BAD_INO_SIZE_DATA); + +- /* check to make sure sectorsize is legal 2^N, 9 <= N <= 15 */ +- +- if (sb->sb_sectsize == 0) +- return(XR_BAD_SECT_SIZE_DATA); +- +- bsize = 1; +- +- for (i = 0; bsize < sb->sb_sectsize && +- i < sizeof(sb->sb_sectsize) * NBBY; i++) { +- bsize <<= 1; +- } +- +- if (i < XFS_MIN_SECTORSIZE_LOG || i > XFS_MAX_SECTORSIZE_LOG) +- return(XR_BAD_SECT_SIZE_DATA); +- +- /* check sb sectorsize field against sb sectlog field */ +- +- if (i != sb->sb_sectlog) +- return(XR_BAD_SECT_SIZE_DATA); +- + if (xfs_sb_version_hassector(sb)) { + + /* check to make sure log sector is legal 2^N, 9 <= N <= 15 */ +@@ -482,9 +488,11 @@ write_primary_sb(xfs_sb_t *sbp, int size) + do_error(_("couldn't seek to offset 0 in filesystem\n")); + } + +- + libxfs_sb_to_disk(buf, sbp, XFS_SB_ALL_BITS); + ++ if (xfs_sb_version_hascrc(sbp)) ++ xfs_update_cksum((char *)buf, size, XFS_SB_CRC_OFF); ++ + if (write(x.dfd, buf, size) != size) { + free(buf); + do_error(_("primary superblock write failed!\n")); +@@ -494,7 +502,7 @@ write_primary_sb(xfs_sb_t *sbp, int size) + } + + /* +- * get a possible superblock -- don't check for internal consistency ++ * get a possible superblock -- checks for internal consistency + */ + int + get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) +@@ -529,9 +537,10 @@ get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) + do_error("%s\n", strerror(error)); + } + libxfs_sb_from_disk(sbp, buf); +- free(buf); + +- return (verify_sb(sbp, 0)); ++ rval = verify_sb((char *)buf, sbp, agno == 0); ++ free(buf); ++ return rval; + } + + /* returns element on list with highest reference count */ +@@ -745,13 +754,11 @@ verify_set_primary_sb(xfs_sb_t *rsb, + off = (xfs_off_t)agno * rsb->sb_agblocks << rsb->sb_blocklog; + + checked[agno] = 1; +- +- if (get_sb(sb, off, size, agno) == XR_EOF) { +- retval = XR_EOF; ++ retval = get_sb(sb, off, size, agno); ++ if (retval == XR_EOF) + goto out_free_list; +- } + +- if (verify_sb(sb, 0) == XR_OK) { ++ if (retval == XR_OK) { + /* + * save away geometry info. + * don't bother checking the sb +diff --git a/repair/scan.c b/repair/scan.c +index 49ed194..1744c32 100644 +--- a/repair/scan.c ++++ b/repair/scan.c +@@ -171,17 +171,12 @@ scan_bmapbt( + xfs_bmbt_rec_t *rp; + xfs_dfiloff_t first_key; + xfs_dfiloff_t last_key; +- char *forkname; ++ char *forkname = get_forkname(whichfork); + int numrecs; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + int state; + +- if (whichfork == XFS_DATA_FORK) +- forkname = _("data"); +- else +- forkname = _("attr"); +- + /* + * unlike the ag freeblock btrees, if anything looks wrong + * in an inode bmap tree, just bail. it's possible that +@@ -273,7 +268,7 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n" + agno = XFS_FSB_TO_AGNO(mp, bno); + agbno = XFS_FSB_TO_AGBNO(mp, bno); + +- pthread_mutex_lock(&ag_locks[agno]); ++ pthread_mutex_lock(&ag_locks[agno].lock); + state = get_bmap(agno, agbno); + switch (state) { + case XR_E_UNKNOWN: +@@ -319,7 +314,7 @@ _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"), + state, ino, bno); + break; + } +- pthread_mutex_unlock(&ag_locks[agno]); ++ pthread_mutex_unlock(&ag_locks[agno].lock); + } else { + /* + * attribute fork for realtime files is in the regular +@@ -866,9 +861,9 @@ _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n") + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + if (XFS_INOBT_IS_FREE_DISK(rp, j)) { + nfree++; +- add_aginode_uncertain(agno, ino + j, 1); ++ add_aginode_uncertain(mp, agno, ino + j, 1); + } else { +- add_aginode_uncertain(agno, ino + j, 0); ++ add_aginode_uncertain(mp, agno, ino + j, 0); + } + } + } +@@ -1229,7 +1224,6 @@ scan_ag( + do_error(_("can't get root superblock for ag %d\n"), agno); + return; + } +- + sb = (xfs_sb_t *)calloc(BBSIZE, 1); + if (!sb) { + do_error(_("can't allocate memory for superblock\n")); +diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c +index 7beffcb..08b25f0 100644 +--- a/repair/xfs_repair.c ++++ b/repair/xfs_repair.c +@@ -29,6 +29,7 @@ + #include "prefetch.h" + #include "threads.h" + #include "progress.h" ++#include "dinode.h" + + #define rounddown(x, y) (((x)/(y))*(y)) + +@@ -136,6 +137,8 @@ err_string(int err_code) + _("bad stripe width in superblock"); + err_message[XR_BAD_SVN] = + _("bad shared version number in superblock"); ++ err_message[XR_BAD_CRC] = ++ _("bad CRC in superblock"); + done = 1; + } + +@@ -528,11 +531,14 @@ main(int argc, char **argv) + xfs_buf_t *sbp; + xfs_mount_t xfs_m; + char *msgbuf; ++ struct xfs_sb psb; ++ int rval; + + progname = basename(argv[0]); + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); ++ dinode_bmbt_translation_init(); + + temp_mp = &xfs_m; + setbuf(stdout, NULL); +@@ -556,13 +562,12 @@ main(int argc, char **argv) + exit(1); + } + +- /* prepare the mount structure */ +- memset(&xfs_m, 0, sizeof(xfs_mount_t)); +- libxfs_buftarg_init(&xfs_m, x.ddev, x.logdev, x.rtdev); +- sbp = libxfs_readbuf(xfs_m.m_ddev_targp, XFS_SB_DADDR, +- 1 << (XFS_MAX_SECTORSIZE_LOG - BBSHIFT), 0, +- &xfs_sb_buf_ops); +- libxfs_sb_from_disk(&xfs_m.m_sb, XFS_BUF_TO_SBP(sbp)); ++ rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0); ++ if (rval != XR_OK) { ++ do_warn(_("Primary superblock bad after phase 1!\n" ++ "Exiting now.\n")); ++ exit(1); ++ } + + /* + * if the sector size of the filesystem we are trying to repair is +@@ -581,7 +586,7 @@ main(int argc, char **argv) + geom.sectsize = BBSIZE; + } + +- if (xfs_m.m_sb.sb_sectsize < geom.sectsize) { ++ if (psb.sb_sectsize < geom.sectsize) { + long old_flags; + + old_flags = fcntl(fd, F_GETFL, 0); +@@ -593,7 +598,10 @@ main(int argc, char **argv) + } + } + } +- mp = libxfs_mount(&xfs_m, &xfs_m.m_sb, x.ddev, x.logdev, x.rtdev, 0); ++ ++ /* prepare the mount structure */ ++ memset(&xfs_m, 0, sizeof(xfs_mount_t)); ++ mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0); + + if (!mp) { + fprintf(stderr, +@@ -601,8 +609,6 @@ main(int argc, char **argv) + progname); + exit(1); + } +- libxfs_putbuf(sbp); +- libxfs_purgebuf(sbp); + + /* + * set XFS-independent status vars from the mount/sb structure +@@ -627,13 +633,32 @@ main(int argc, char **argv) + * to target these for an increase in thread count. Hence a stride value + * of 15 is chosen to ensure we get at least 2 AGs being scanned at once + * on such filesystems. ++ * ++ * Limit the maximum thread count based on the available CPU power that ++ * is available. If we use too many threads, we might run out of memory ++ * and CPU power before we run out of IO concurrency. We limit to 8 ++ * threads/CPU as this is enough threads to saturate a CPU on fast ++ * devices, yet few enough that it will saturate but won't overload slow ++ * devices. + */ + if (!ag_stride && glob_agcount >= 16 && do_prefetch) + ag_stride = 15; + + if (ag_stride) { ++ int max_threads = platform_nproc() * 8; ++ + thread_count = (glob_agcount + ag_stride - 1) / ag_stride; +- thread_init(); ++ while (thread_count > max_threads) { ++ ag_stride *= 2; ++ thread_count = (glob_agcount + ag_stride - 1) / ++ ag_stride; ++ } ++ if (thread_count > 0) ++ thread_init(); ++ else { ++ thread_count = 1; ++ ag_stride = 0; ++ } + } + + if (ag_stride && report_interval) { +@@ -895,6 +920,11 @@ _("Note - stripe unit (%d) and width (%d) fields have been reset.\n" + if (verbose) + summary_report(); + do_log(_("done\n")); ++ ++ if (dangerously && !no_modify) ++ do_warn( ++_("Repair of readonly mount complete. Immediate reboot encouraged.\n")); ++ + pftrace_done(); + + return (0); diff --git a/SPECS/xfsprogs.spec b/SPECS/xfsprogs.spec index ae29590..6425f57 100644 --- a/SPECS/xfsprogs.spec +++ b/SPECS/xfsprogs.spec @@ -1,7 +1,7 @@ Summary: Utilities for managing the XFS filesystem Name: xfsprogs Version: 3.2.0 -Release: 0.1.alpha1%{?dist} +Release: 0.10.alpha2%{?dist} # Licensing based on generic "GNU GENERAL PUBLIC LICENSE" # in source, with no mention of version. # doc/COPYING file specifies what is GPL and what is LGPL @@ -9,7 +9,7 @@ Release: 0.1.alpha1%{?dist} License: GPL+ and LGPLv2+ Group: System Environment/Base URL: http://oss.sgi.com/projects/xfs/ -Source0: ftp://oss.sgi.com/projects/xfs/cmd_tars/%{name}-%{version}-alpha1.tar.gz +Source0: ftp://oss.sgi.com/projects/xfs/cmd_tars/%{name}-%{version}-alpha2.tar.gz Source1: xfsprogs-wrapper.h BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: libtool, gettext, libuuid-devel @@ -18,10 +18,12 @@ Provides: xfs-cmds Obsoletes: xfs-cmds <= %{version} Conflicts: xfsdump < 3.0.1 -Patch0: xfsprogs-3.2.0-bigendian.patch -Patch1: xfsprogs-3.2.0-init-mounts-symlinks.patch -Patch2: xfsprogs-3.2.0-test_fd_regular_files.patch -Patch3: xfsprogs-3.2.0-fix-l_sectBBsize.patch +# Bring xfsprogs up to fd799f7 in the upstream repo +Patch0: xfsprogs-diff-since-alpha2.patch +# 2 small patches on list not yet committed +Patch1: xfsprogs-3.2.0-repair-zero-sb.patch +Patch2: xfsprogs-3.2.0-xfs_db-quiet.patch +Patch3: xfsprogs-3.2.0-prefetch-fix.patch %description A set of commands to use the XFS filesystem, including mkfs.xfs. @@ -64,7 +66,7 @@ You should install xfsprogs-qa-devel only if you are interested in building or running the xfstests QA suite. %prep -%setup -q -n xfsprogs-3.2.0-alpha1 +%setup -q -n xfsprogs-3.2.0-alpha2 %patch0 -p1 %patch1 -p1 @@ -95,6 +97,10 @@ chmod 0755 $RPM_BUILD_ROOT/%{_libdir}/libhandle.so.*.*.* # remove non-versioned docs location rm -rf $RPM_BUILD_ROOT/%{_datadir}/doc/xfsprogs/ +# xfs_check is deprecated; nuke it from orbit for RHEL7 +rm -f $RPM_BUILD_ROOT/%{_sbindir}/xfs_check +rm -f $RPM_BUILD_ROOT/%{_mandir}/man8/xfs_check* + # ugly hack to allow parallel install of 32-bit and 64-bit -devel packages: %define multilib_arches %{ix86} x86_64 ppc ppc64 s390 s390x %{sparc} @@ -134,6 +140,7 @@ rm -rf $RPM_BUILD_ROOT %{_includedir}/xfs/platform_defs.h %{_includedir}/xfs/xfs.h %{_includedir}/xfs/xfs_fs.h +%{_includedir}/xfs/xfs_types.h %{_includedir}/xfs/xqm.h %{_libdir}/*.so @@ -163,9 +170,9 @@ rm -rf $RPM_BUILD_ROOT %{_includedir}/xfs/xfs_btree_trace.h %{_includedir}/xfs/xfs_cksum.h %{_includedir}/xfs/xfs_da_btree.h +%{_includedir}/xfs/xfs_da_format.h %{_includedir}/xfs/xfs_dinode.h %{_includedir}/xfs/xfs_dir2.h -%{_includedir}/xfs/xfs_dir2_format.h %{_includedir}/xfs/xfs_format.h %{_includedir}/xfs/xfs_ialloc.h %{_includedir}/xfs/xfs_ialloc_btree.h @@ -177,12 +184,48 @@ rm -rf $RPM_BUILD_ROOT %{_includedir}/xfs/xfs_metadump.h %{_includedir}/xfs/xfs_quota_defs.h %{_includedir}/xfs/xfs_sb.h +%{_includedir}/xfs/xfs_shared.h %{_includedir}/xfs/xfs_trace.h %{_includedir}/xfs/xfs_trans_resv.h %{_includedir}/xfs/xfs_trans_space.h -%{_includedir}/xfs/xfs_types.h %changelog +* Tue Mar 11 2014 Eric Sandeen 3.2.0-0.10.alpha2 +- Fix bug in xfs_repair's inode prefetch (#1083820) + +* Tue Mar 11 2014 Eric Sandeen 3.2.0-0.9.alpha2 +- Sync up with upstream's latest CRC enhancements (#1074037) + +* Fri Feb 28 2014 Eric Sandeen 3.2.0-0.8.alpha2 +- mkfs.xfs fix default log size for small filesystems (#1034003) +- xfs_copy: partial fixups for CRC filesystems (#1043570) +- xfs_logprint: Don't error out after split items lose context (#1043591) + +* Tue Feb 24 2014 Eric Sandeen 3.2.0-0.7.alpha2 +- xfs_metadump: Really add xfs_metadump -F option (#1040921) +- xfs_check: Remove xfs_check manpage, xfs_check is deprecated (#1029458) + +* Mon Feb 24 2014 Eric Sandeen 3.2.0-0.6.alpha2 +- xfs_metadump: Require -F if proper SB magic is not found (#1040921) +- xfs_repair: fix bad block pointer found in large directories (#1034157) +- libxfs: Don't mark single-map blockmaps as discontiguous (#1033480) +- libxfs: Clear stale buffer errors on write (1033480) + +* Fri Jan 24 2014 Daniel Mach - 3.2.0-0.5.alpha2 +- Mass rebuild 2014-01-24 + +* Fri Dec 27 2013 Daniel Mach - 3.2.0-0.4.alpha2 +- Mass rebuild 2013-12-27 + +* Mon Nov 25 2013 Eric Sandeen 3.2.0-0.3.alpha2 +- New upstream alpha release (#1034445) +- Remove xfs_check reference from fsck.xfs output (#1029455) +- Fix xfs_fsr on some files with selinux attributes (#1034013) + +* Fri Nov 15 2013 Eric Sandeen 3.2.0-0.2.alpha1 +- Move xfs_types.h from xfsprogs-qa-devel to xfsprogs-devel (#1024048) +- Remove deprecated xfs_check from package (#1029458) + * Thu Sep 26 2013 Eric Sandeen 3.2.0-0.1.alpha1 - New upstream alpha release with preliminary CRC support (#1015632) - Additional patches beyon 3.2.0-alpha1: