diff --git a/.gitignore b/.gitignore index 053f354..3f64e93 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/xfsprogs-3.2.0-alpha2.tar.gz +SOURCES/xfsprogs-3.2.1.tar.gz diff --git a/.xfsprogs.metadata b/.xfsprogs.metadata index ccb64e7..2717604 100644 --- a/.xfsprogs.metadata +++ b/.xfsprogs.metadata @@ -1 +1 @@ -7647534fa3b2b2c61dc625d58bfc5671d6895ff4 SOURCES/xfsprogs-3.2.0-alpha2.tar.gz +9926245f0dafc0f19fd698a065b5384d5e305926 SOURCES/xfsprogs-3.2.1.tar.gz diff --git a/SOURCES/xfsprogs-3.2.0-prefetch-fix.patch b/SOURCES/xfsprogs-3.2.0-prefetch-fix.patch deleted file mode 100644 index a3f8e56..0000000 --- a/SOURCES/xfsprogs-3.2.0-prefetch-fix.patch +++ /dev/null @@ -1,71 +0,0 @@ -[PATCH] xfs_repair: fix prefetch queue waiting - -This fixes a regression caused by: - -97b1fcf xfs_repair: fix array overrun in do_inode_prefetch - -The thread creation loop has 2 ways to exit; either via -the loop counter based on thread_count, or the break statement -if we've started enough workers to cover all AGs. - -Whether or not the loop counter "i" reflects the number of -threads started depends on whether or not we exited via the -break. - -The above commit prevented us from indexing off the end -of the queues[] array if we actually advanced "i" all the -way to thread_count, but in the case where we break, "i" -is one *less* than the nr of threads started, so we don't -wait for completion of all threads, and all hell breaks -loose in phase 5. - -Just stop with the cleverness of re-using the loop counter - -instead, explicitly count threads that we start, and then use -that counter to wait for each worker to complete. - -Signed-off-by: Eric Sandeen -Reviewed-by: Brian Foster ---- - -I have one fs which demonstrates the problem, and have verified -the regression & tested the fix against that. - -I'll run this over xfstests overnight, but it seems obvious -from here (OTOH the other fix seemed obvious too) - -diff --git a/repair/prefetch.c b/repair/prefetch.c -index e47a48e..4c32395 100644 ---- a/repair/prefetch.c -+++ b/repair/prefetch.c -@@ -944,6 +944,7 @@ do_inode_prefetch( - int i; - struct work_queue queue; - struct work_queue *queues; -+ int queues_started = 0; - - /* - * If the previous phases of repair have not overflowed the buffer -@@ -987,6 +988,7 @@ do_inode_prefetch( - - create_work_queue(&queues[i], mp, 1); - queue_work(&queues[i], prefetch_ag_range_work, 0, wargs); -+ queues_started++; - - if (wargs->end_ag >= mp->m_sb.sb_agcount) - break; -@@ -995,7 +997,7 @@ do_inode_prefetch( - /* - * wait for workers to complete - */ -- while (i--) -+ for (i = 0; i < queues_started; i++) - destroy_work_queue(&queues[i]); - free(queues); - } - -_______________________________________________ -xfs mailing list -xfs@oss.sgi.com -http://oss.sgi.com/mailman/listinfo/xfs - - diff --git a/SOURCES/xfsprogs-3.2.0-repair-zero-sb.patch b/SOURCES/xfsprogs-3.2.0-repair-zero-sb.patch deleted file mode 100644 index 7aa8693..0000000 --- a/SOURCES/xfsprogs-3.2.0-repair-zero-sb.patch +++ /dev/null @@ -1,30 +0,0 @@ -[PATCH] repair: ensure that unused superblock fields are zeroed - -From: Dave Chinner - -When we grab a superblock off disk via get_sb(), we don't know what -the in-memory superblock we are filling out contained. We ned to -ensure that the entire structure is returned in an initialised -state regardless of which fields libxfs_sb_from_disk() populates -from disk. In this case, it doesn't populate the sb_crc field, -and so uninitialised values can escape through to disk on v4 -filesystems because of this. This causes xfs/031 to fail on v4 -filesystems. - -Reported-by: Eric Sandeen -Signed-off-by: Dave Chinner ---- - repair/sb.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/repair/sb.c b/repair/sb.c -index b111aca..d928dc0 100644 ---- a/repair/sb.c -+++ b/repair/sb.c -@@ -518,6 +518,7 @@ get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) - exit(1); - } - memset(buf, 0, size); -+ memset(sbp, 0, sizeof(*sbp)); - - /* try and read it first */ diff --git a/SOURCES/xfsprogs-3.2.0-xfs_db-quiet.patch b/SOURCES/xfsprogs-3.2.0-xfs_db-quiet.patch deleted file mode 100644 index 769c6f8..0000000 --- a/SOURCES/xfsprogs-3.2.0-xfs_db-quiet.patch +++ /dev/null @@ -1,29 +0,0 @@ -[PATCH] xfs_db: hide debug bbmap output - -Most of xfsprogs building with DEBUG enables extra -checks, asserts, etc, but this bunch of printfs was -extra output that's not generally helpful for most -people's runtime experience - and it breaks xfs/290 -with all the noise. - -I assume it's for actual debugging use, and not -generally useful, so bury it a bit deeper under -it's own #ifdef. - -Signed-off-by: Eric Sandeen ---- - -diff --git a/db/io.c b/db/io.c -index 9a787c8..89e6030 100644 ---- a/db/io.c -+++ b/db/io.c -@@ -500,7 +500,7 @@ set_cur( - push_cur(); - - if (bbmap) { --#ifdef DEBUG -+#ifdef DEBUG_BBMAP - int i; - printf(_("xfs_db got a bbmap for %lld\n"), (long long)d); - printf(_("\tblock map")); - diff --git a/SOURCES/xfsprogs-3.2.1-add-supported-file-attributes-to-xfs.5-manpage.patch b/SOURCES/xfsprogs-3.2.1-add-supported-file-attributes-to-xfs.5-manpage.patch new file mode 100644 index 0000000..6c00bfe --- /dev/null +++ b/SOURCES/xfsprogs-3.2.1-add-supported-file-attributes-to-xfs.5-manpage.patch @@ -0,0 +1,57 @@ +commit 794b62f68063d3af74ab7e79a9d7049887ec3ece +Author: Eric Sandeen +Date: Tue Sep 16 09:23:45 2014 +1000 + + xfsprogs: add supported file attributes to xfs.5 manpage + + The chattr(1) manpage suffers from the same problems mount(1) had: + many options listed, not kept up to date for various filesystems. + + I've submitted a manpage update for chattr(1) which says to refer to + filesystem-specific manpages for supported attributes; this patch + updates xfs(5) to list the attributes supported by xfs. + + Signed-off-by: Eric Sandeen + Reviewed-by: Dave Chinner + Signed-off-by: Dave Chinner + +diff --git a/man/man5/xfs.5 b/man/man5/xfs.5 +index 5e47c4c..3214455 100644 +--- a/man/man5/xfs.5 ++++ b/man/man5/xfs.5 +@@ -1,6 +1,6 @@ + .TH xfs 5 + .SH NAME +-xfs \- layout and mount options for the XFS filesystem ++xfs \- layout, mount options, and supported file attributes for the XFS filesystem + .SH DESCRIPTION + An XFS filesystem can reside on a regular disk partition or on a + logical volume. +@@ -302,7 +302,27 @@ namespace is on stable storage. This is useful in HA setups + where failover must not result in clients seeing + inconsistent namespace presentation during or after a + failover event. ++.SH FILE ATTRIBUTES ++The XFS filesystem supports setting the following file ++attributes on Linux systems using the ++.BR chattr (1) ++utility: ++.sp ++.BR a " - append only" ++.sp ++.BR A " - no atime updates" ++.sp ++.BR d " - no dump" ++.sp ++.BR i " - immutable" ++.sp ++.BR S " - synchronous updates" ++.sp ++For descriptions of these attribute flags, please refer to the ++.BR chattr (1) ++man page. + .SH SEE ALSO ++.BR chattr (1), + .BR xfsctl (3), + .BR mount (8), + .BR mkfs.xfs (8), diff --git a/SOURCES/xfsprogs-3.2.1-copy-stripe-geometry.patch b/SOURCES/xfsprogs-3.2.1-copy-stripe-geometry.patch new file mode 100644 index 0000000..46cb250 --- /dev/null +++ b/SOURCES/xfsprogs-3.2.1-copy-stripe-geometry.patch @@ -0,0 +1,92 @@ +From: Eric Sandeen +Date: Wed, 16 Jul 2014 03:52:47 +0000 (+1000) +Subject: repair: copy, don't clear, stripe geometry in backup SB +X-Git-Url: http://oss.sgi.com/cgi-bin/gitweb.cgi?p=xfs%2Fcmds%2Fxfsprogs.git;a=commitdiff_plain;h=6bf4721d47d9755029a7ec944af2832bd115a851 + +repair: copy, don't clear, stripe geometry in backup SB + +Today, if we have a filesystem with stripe geometry and +a damaged primary superblock, we will zero out stripe geometry +if we have copied the backup. + +I'm guessing this might be because changing geometry with mount +options only updates the primary, so backups aren't guaranteed +to be current or correct. + +Unfortunately, that leaves us with sb 0 w/ no geom, and backups +*with* geom, so the next repair finds the mismatch, and complains. +(In other words, the 2nd repair does not come up clean.)_ +And ... the second repair copies the backup stripe geometry back +into the primary! + +Rather than clearing stripe geometry in this case, just leave it +at what was found in the backup super, and inform the user that this +was done. This leaves a consistent filesystem, and gives the user +a heads-up to double-check the result. + +This can all be demonstrated and tested by running xfs/030 with +geometry set in MKFS_OPTIONS. (To really make the test pass, +we need to filter the warning out of repair output.) + +Signed-off-by: Eric Sandeen +Reviewed-by: Brian Foster +Signed-off-by: Dave Chinner +--- + +diff --git a/repair/globals.h b/repair/globals.h +index f6e0a22..6207ca1 100644 +--- a/repair/globals.h ++++ b/repair/globals.h +@@ -124,7 +124,7 @@ EXTERN int lazy_count; /* What to set if to if converting */ + + EXTERN int primary_sb_modified; + EXTERN int bad_ino_btree; +-EXTERN int clear_sunit; ++EXTERN int copied_sunit; + EXTERN int fs_is_dirty; + + /* for hunting down the root inode */ +diff --git a/repair/sb.c b/repair/sb.c +index bc421cc..ad27756 100644 +--- a/repair/sb.c ++++ b/repair/sb.c +@@ -151,7 +151,7 @@ find_secondary_sb(xfs_sb_t *rsb) + */ + memmove(rsb, &bufsb, sizeof(xfs_sb_t)); + rsb->sb_inprogress = 0; +- clear_sunit = 1; ++ copied_sunit = 1; + + if (verify_set_primary_sb(rsb, 0, &dirty) == XR_OK) { + do_warn( +diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c +index 9eb2fa4..834697a 100644 +--- a/repair/xfs_repair.c ++++ b/repair/xfs_repair.c +@@ -193,7 +193,7 @@ process_args(int argc, char **argv) + delete_attr_ok = 1; + force_geo = 0; + assume_xfs = 0; +- clear_sunit = 0; ++ copied_sunit = 0; + sb_inoalignmt = 0; + sb_unit = 0; + sb_width = 0; +@@ -898,13 +898,11 @@ _("Warning: project quota information would be cleared.\n" + dsb->sb_qflags &= cpu_to_be16(~XFS_ALL_QUOTA_CHKD); + } + +- if (clear_sunit) { ++ if (copied_sunit) { + do_warn( +-_("Note - stripe unit (%d) and width (%d) fields have been reset.\n" +- "Please set with mount -o sunit=,swidth=\n"), ++_("Note - stripe unit (%d) and width (%d) were copied from a backup superblock.\n" ++ "Please reset with mount -o sunit=,swidth= if necessary\n"), + be32_to_cpu(dsb->sb_unit), be32_to_cpu(dsb->sb_width)); +- dsb->sb_unit = 0; +- dsb->sb_width = 0; + } + + libxfs_writebuf(sbp, 0); + diff --git a/SOURCES/xfsprogs-3.2.1-libxcmd-make-all-comparisons-using-realpathd-paths.patch b/SOURCES/xfsprogs-3.2.1-libxcmd-make-all-comparisons-using-realpathd-paths.patch new file mode 100644 index 0000000..019b6bd --- /dev/null +++ b/SOURCES/xfsprogs-3.2.1-libxcmd-make-all-comparisons-using-realpathd-paths.patch @@ -0,0 +1,176 @@ +commit ed350fc6c49155ec398866ebef1d59be02636bce +Author: Eric Sandeen +Date: Wed Jul 16 13:53:47 2014 +1000 + + libxcmd: make all comparisons using realpath'd paths + + Both mountpoints and devices can be symlinks, so given a path + to look for, and mountpoints/devices from the system, use + realpath() on *everything* before making the comparison to see + if our path is a match. + + So, with symlinks for mount points as well as for devices: + + # ls -l /dev/mapper/testvg-lvol0 + lrwxrwxrwx. 1 root root 7 Jul 11 19:24 /dev/mapper/testvg-lvol0 -> ../dm-3 + # ls -l /mnt/scratch2 + lrwxrwxrwx. 1 root root 12 Jul 11 19:57 /mnt/scratch2 -> /mnt/scratch + + this should all work, and does now: + + # xfs_quota -xc "report -h" /mnt/scratch2 + User quota on /mnt/scratch (/dev/mapper/testvg-lvol0) + Blocks + User ID Used Soft Hard Warn/Grace + ---------- --------------------------------- + root 0 0 0 00 [------] + + # xfs_quota -xc "report -h" /mnt/scratch + User quota on /mnt/scratch (/dev/mapper/testvg-lvol0) + Blocks + User ID Used Soft Hard Warn/Grace + ---------- --------------------------------- + root 0 0 0 00 [------] + + # xfs_quota -xc "report -h" /dev/dm-3 + User quota on /mnt/scratch (/dev/mapper/testvg-lvol0) + Blocks + User ID Used Soft Hard Warn/Grace + ---------- --------------------------------- + root 0 0 0 00 [------] + + # xfs_quota -xc "report -h" /dev/mapper/testvg-lvol0 + User quota on /mnt/scratch (/dev/mapper/testvg-lvol0) + Blocks + User ID Used Soft Hard Warn/Grace + ---------- --------------------------------- + root 0 0 0 00 [------] + + The commit: + + 050a7f1 xfsprogs: handle symlinks etc in fs_table_initialise_mounts() + + tried to fix this earlier, but only worked one way; + it compared the argument path in both given and realpath + form to the paths in getmntent, but did not compare to + the realpaths of the getmntent devices. + + If we reduce everything, everywhere, to a realpath(), we've + got our best shot at finding the match. + + Signed-off-by: Eric Sandeen + Reviewed-by: Christoph Hellwig + Signed-off-by: Dave Chinner + +diff --git a/libxcmd/paths.c b/libxcmd/paths.c +index 7b0e434..443adbb 100644 +--- a/libxcmd/paths.c ++++ b/libxcmd/paths.c +@@ -269,6 +269,9 @@ out_nomem: + /* + * If *path is NULL, initialize the fs table with all xfs mount points in mtab + * If *path is specified, search for that path in mtab ++ * ++ * Everything - path, devices, and mountpoints - are boiled down to realpath() ++ * for comparison, but fs_table is populated with what comes from getmntent. + */ + static int + fs_table_initialise_mounts( +@@ -278,7 +281,7 @@ fs_table_initialise_mounts( + FILE *mtp; + char *fslog, *fsrt; + int error, found; +- char *rpath = NULL; ++ char rpath[PATH_MAX], rmnt_fsname[PATH_MAX], rmnt_dir[PATH_MAX]; + + error = found = 0; + fslog = fsrt = NULL; +@@ -294,17 +297,20 @@ fs_table_initialise_mounts( + + /* Use realpath to resolve symlinks, relative paths, etc */ + if (path) +- if ((rpath = realpath(path, NULL)) == NULL) +- return ENOENT; ++ if (!realpath(path, rpath)) ++ return errno; + + while ((mnt = getmntent(mtp)) != NULL) { + if (strcmp(mnt->mnt_type, "xfs") != 0) + continue; ++ if (!realpath(mnt->mnt_dir, rmnt_dir)) ++ continue; ++ if (!realpath(mnt->mnt_fsname, rmnt_fsname)) ++ continue; ++ + if (path && +- ((strcmp(path, mnt->mnt_dir) != 0) && +- (strcmp(path, mnt->mnt_fsname) != 0) && +- (strcmp(rpath, mnt->mnt_dir) != 0) && +- (strcmp(rpath, mnt->mnt_fsname) != 0))) ++ ((strcmp(rpath, rmnt_dir) != 0) && ++ (strcmp(rpath, rmnt_fsname) != 0))) + continue; + if (fs_extract_mount_options(mnt, &fslog, &fsrt)) + continue; +@@ -316,7 +322,6 @@ fs_table_initialise_mounts( + } + } + endmntent(mtp); +- free(rpath); + + if (path && !found) + error = ENXIO; +@@ -330,6 +335,9 @@ fs_table_initialise_mounts( + /* + * If *path is NULL, initialize the fs table with all xfs mount points in mtab + * If *path is specified, search for that path in mtab ++ * ++ * Everything - path, devices, and mountpoints - are boiled down to realpath() ++ * for comparison, but fs_table is populated with what comes from getmntinfo. + */ + static int + fs_table_initialise_mounts( +@@ -337,7 +345,7 @@ fs_table_initialise_mounts( + { + struct statfs *stats; + int i, count, error, found; +- char *rpath = NULL; ++ char rpath[PATH_MAX], rmntfromname[PATH_MAX], rmntonname[PATH_MAX]; + + error = found = 0; + if ((count = getmntinfo(&stats, 0)) < 0) { +@@ -348,17 +356,20 @@ fs_table_initialise_mounts( + + /* Use realpath to resolve symlinks, relative paths, etc */ + if (path) +- if ((rpath = realpath(path, NULL)) == NULL) +- return ENOENT; ++ if (!realpath(path, rpath)) ++ return errno; + + for (i = 0; i < count; i++) { + if (strcmp(stats[i].f_fstypename, "xfs") != 0) + continue; ++ if (!realpath(stats[i].f_mntfromname, rmntfromname)) ++ continue; ++ if (!realpath(stats[i].f_mntonname, rmnttomname))) ++ continue; ++ + if (path && +- ((strcmp(path, stats[i].f_mntonname) != 0) && +- (strcmp(path, stats[i].f_mntfromname) != 0) && +- (strcmp(rpath, stats[i].f_mntonname) != 0) && +- (strcmp(rpath, stats[i].f_mntfromname) != 0))) ++ ((strcmp(rpath, rmntonname) != 0) && ++ (strcmp(rpath, rmntfromname) != 0))) + continue; + /* TODO: external log and realtime device? */ + (void) fs_table_insert(stats[i].f_mntonname, 0, +@@ -369,7 +380,6 @@ fs_table_initialise_mounts( + break; + } + } +- free(rpath); + if (path && !found) + error = ENXIO; + diff --git a/SOURCES/xfsprogs-3.2.1-quota-fix-NULL-pointer-dereference-in-report_f.patch b/SOURCES/xfsprogs-3.2.1-quota-fix-NULL-pointer-dereference-in-report_f.patch new file mode 100644 index 0000000..b1cc75d --- /dev/null +++ b/SOURCES/xfsprogs-3.2.1-quota-fix-NULL-pointer-dereference-in-report_f.patch @@ -0,0 +1,40 @@ +commit a14d40939de7d38029f99c10bc237bb68e83d119 +Author: Jie Liu +Date: Wed Jul 16 13:54:47 2014 +1000 + + quota: fix NULL pointer dereference in report_f + + Run xfs_quota report against an invalid XFS path without desired quota + limitation is enabled will hit SEGSEGV as fs_path is uninitialized, e.g. + + # xfs_quota -xc 'report -up' /invalid_path + xfs_quota: cannot setup path for mount /invalid_path: No such file or directory + Segmentation fault (core dumped) + + (gdb) r -xc 'report -up' /invalid_path + xfs_quota: cannot setup path for mount /invalid_path: No such file or directory + + Program received signal SIGSEGV, Segmentation fault. + 0x0000000000408b4d in report_f (argc=2, argv=0x105ea70) at report.c:627 + 627 else if (fs_path->fs_flags & FS_MOUNT_POINT) + + This patch fixes report_f() to only do report if the fs_path is initialized. + + Signed-off-by: Jie Liu + Reviewed-by: Eric Sandeen + Reviewed-by: Christoph Hellwig + Signed-off-by: Dave Chinner + +diff --git a/quota/report.c b/quota/report.c +index 70894a2..8e3316e 100644 +--- a/quota/report.c ++++ b/quota/report.c +@@ -624,7 +624,7 @@ report_f( + if (flags & ALL_MOUNTS_FLAG) + report_any_type(fp, form, type, NULL, + lower, upper, flags); +- else if (fs_path->fs_flags & FS_MOUNT_POINT) ++ else if (fs_path && (fs_path->fs_flags & FS_MOUNT_POINT)) + report_any_type(fp, form, type, fs_path->fs_dir, + lower, upper, flags); + } else while (argc > optind) { diff --git a/SOURCES/xfsprogs-3.2.1-xfs_copy-simplify-first_agbno-calculation.patch b/SOURCES/xfsprogs-3.2.1-xfs_copy-simplify-first_agbno-calculation.patch new file mode 100644 index 0000000..a7063ae --- /dev/null +++ b/SOURCES/xfsprogs-3.2.1-xfs_copy-simplify-first_agbno-calculation.patch @@ -0,0 +1,75 @@ +commit 263b53767a3df33f392262f539bfb35ec578f5e5 +Author: Eric Sandeen +Date: Thu Nov 13 10:02:22 2014 +1100 + + xfs_copy: simplify first_agbno calculation + + After ffe9a9a xfsprogs: xfs_copy: fix data corruption of target, + xfs_copy started hitting an ASSERT for a 4k sector / 4k blocksize + filesystem: + + # dd if=/dev/zero of=test.img bs=1M count=1024 + # mkfs.xfs -s size=4096 test.img + # xfs_copy test.img xfs.img + xfs_copy: xfs_copy.c:720: main: Assertion `((((((xfs_daddr_t)(3 << (mp)->m_sectbb_log)) + 1) * (1<<9)) + first_residue) % source_blocksize) == 0' failed. + Aborted + + I started digging through all the calculations below, and realized + that in the end, all it wants is the first filesystem block after + the AG header. XFS_AGFL_BLOCK(mp) + 1 suffices for this purpose; + rip out the rest which seems overly complex and apparently bug-prone. + + I tested this by creating a 4g filesystem with combinations of + sector & block size between 512 and 4k, copying in /lib/modules, + running an xfs_copy of that, and running repair against the copy; + it all looks good. It took a long time, but I will create a + simpler/shorter xfstest based on this. + + Reported-by: Zorro Lang + Signed-off-by: Eric Sandeen + Reviewed-by: Brian Foster + Signed-off-by: Dave Chinner + +diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c +index 7ce5ec9..279527c 100644 +--- a/copy/xfs_copy.c ++++ b/copy/xfs_copy.c +@@ -475,7 +475,7 @@ main(int argc, char **argv) + int open_flags; + xfs_off_t pos, end_pos; + size_t length; +- int c, first_residue, tmp_residue; ++ int c; + __uint64_t size, sizeb; + __uint64_t numblocks = 0; + int wblocks = 0; +@@ -697,27 +697,13 @@ main(int argc, char **argv) + ASSERT(source_blocksize % source_sectorsize == 0); + ASSERT(source_sectorsize % BBSIZE == 0); + +- if (source_blocksize > source_sectorsize) { +- /* get number of leftover sectors in last block of ag header */ +- +- tmp_residue = ((XFS_AGFL_DADDR(mp) + 1) * BBSIZE) +- % source_blocksize; +- first_residue = (tmp_residue == 0) ? 0 : +- source_blocksize - tmp_residue; +- ASSERT(first_residue % source_sectorsize == 0); +- } else if (source_blocksize == source_sectorsize) { +- first_residue = 0; +- } else { ++ if (source_blocksize < source_sectorsize) { + do_log(_("Error: filesystem block size is smaller than the" + " disk sectorsize.\nAborting XFS copy now.\n")); + exit(1); + } + +- first_agbno = (((XFS_AGFL_DADDR(mp) + 1) * BBSIZE) +- + first_residue) / source_blocksize; +- ASSERT(first_agbno != 0); +- ASSERT(((((XFS_AGFL_DADDR(mp) + 1) * BBSIZE) +- + first_residue) % source_blocksize) == 0); ++ first_agbno = XFS_AGFL_BLOCK(mp) + 1; + + /* now open targets */ + diff --git a/SOURCES/xfsprogs-3.2.1-xfs_quota-manpage.patch b/SOURCES/xfsprogs-3.2.1-xfs_quota-manpage.patch new file mode 100644 index 0000000..26163ca --- /dev/null +++ b/SOURCES/xfsprogs-3.2.1-xfs_quota-manpage.patch @@ -0,0 +1,45 @@ +Two patches on upstream list: + +[PATCH] xfs_quota: fix typo in manpage + +and + +[PATCH] xfs_quota: man page fix - project command requires arguments + +The xfs_quota man page states that the "project" command without +arguments will list all project names and identifiers, but it has +never done this; the project_f command has always been defined as +requiring at least one argument. + +Fix the man page to reflect reality. + +Signed-off-by: Eric Sandeen +--- + +diff --git a/man/man8/xfs_quota.8 b/man/man8/xfs_quota.8 +index 8cc8ab7..3ca2fa5 100644 +--- a/man/man8/xfs_quota.8 ++++ b/man/man8/xfs_quota.8 +@@ -324,7 +324,7 @@ path to the + list entry (the current path is used by many + of the commands described here, it identifies the filesystem toward + which a command is directed). +-The patch list can come from several places \- the command line, ++The path list can come from several places \- the command line, + the mount table, and the + .I /etc/projects + file. +@@ -565,12 +565,7 @@ instead of stdout. + .I name + ] + .br +-Without arguments, this command lists known project names and identifiers +-(based on entries in the +-.I /etc/projects +-and +-.I /etc/projid +-files). The ++The + .BR \-c , + .BR \-C , + and diff --git a/SOURCES/xfsprogs-3.2.1-xfs_repair-fix-max-block-offset-test.patch b/SOURCES/xfsprogs-3.2.1-xfs_repair-fix-max-block-offset-test.patch new file mode 100644 index 0000000..732730f --- /dev/null +++ b/SOURCES/xfsprogs-3.2.1-xfs_repair-fix-max-block-offset-test.patch @@ -0,0 +1,54 @@ +[PATCH V2] xfs_repair: fix max block offset test + +Eryu pointed out that in fstest xfs/071, we find corruption +reported at the end. This test attempts to do IO at the +maximum possible offsets, and repair yields: + +inode 1027 - extent offset too large - start 70, count 1, offset 2251799813685247 +correcting nextents for inode 1027 +bad data fork in inode 1027 +would have cleared inode 1027 + +Repair is complaining that an extent *starts* at the maximum +block, but AFAICT, starting there is just fine, as long as +we also end there. i.e. a one-block extent at the limit +is just fine. + +So change the xfs_repair test to allow this situation. + +Also, the warning text is a bit unclear, mixing in the physical +block w/ the logical block... rearrange that a little to make +it obvious. + +Reported-by: Eryu Guan +Signed-off-by: Eric Sandeen +Reviewed-by: Brian Foster +--- + +V2: Update the warning text + +diff --git a/repair/dinode.c b/repair/dinode.c +index 38a6562..59824ec 100644 +--- a/repair/dinode.c ++++ b/repair/dinode.c +@@ -667,12 +667,14 @@ _("inode %" PRIu64 " - bad extent overflows - start %" PRIu64 ", " + irec.br_startoff); + goto done; + } +- if (irec.br_startoff >= fs_max_file_offset) { ++ /* Ensure this extent does not extend beyond the max offset */ ++ if (irec.br_startoff + irec.br_blockcount - 1 > ++ fs_max_file_offset) { + do_warn( +-_("inode %" PRIu64 " - extent offset too large - start %" PRIu64 ", " +- "count %" PRIu64 ", offset %" PRIu64 "\n"), +- ino, irec.br_startblock, irec.br_blockcount, +- irec.br_startoff); ++_("inode %" PRIu64 " - extent exceeds max offset - start %" PRIu64 ", " ++ "count %" PRIu64 ", physical block %" PRIu64 "\n"), ++ ino, irec.br_startoff, irec.br_blockcount, ++ irec.br_startblock); + goto done; + } + + diff --git a/SOURCES/xfsprogs-diff-since-alpha2.patch b/SOURCES/xfsprogs-diff-since-alpha2.patch deleted file mode 100644 index 85aef6f..0000000 --- a/SOURCES/xfsprogs-diff-since-alpha2.patch +++ /dev/null @@ -1,5337 +0,0 @@ -diff --git a/copy/xfs_copy.c b/copy/xfs_copy.c -index 9986fbf..9f2f99d 100644 ---- a/copy/xfs_copy.c -+++ b/copy/xfs_copy.c -@@ -684,6 +684,16 @@ main(int argc, char **argv) - sb = &mbuf.m_sb; - libxfs_sb_from_disk(sb, XFS_BUF_TO_SBP(sbp)); - -+ /* -+ * For now, V5 superblock filesystems are not supported without -d; -+ * we do not have the infrastructure yet to fix CRCs when a new UUID -+ * is generated. -+ */ -+ if (xfs_sb_version_hascrc(sb) && !duplicate) { -+ do_log(_("%s: Cannot yet copy V5 fs without '-d'\n"), progname); -+ exit(1); -+ } -+ - mp = libxfs_mount(&mbuf, sb, xargs.ddev, xargs.logdev, xargs.rtdev, 0); - if (mp == NULL) { - do_log(_("%s: %s filesystem failed to initialize\n" -@@ -957,7 +967,13 @@ main(int argc, char **argv) - ((char *)btree_buf.data + - pos - btree_buf.position); - -- ASSERT(be32_to_cpu(block->bb_magic) == XFS_ABTB_MAGIC); -+ if (be32_to_cpu(block->bb_magic) != -+ (xfs_sb_version_hascrc(&mp->m_sb) ? -+ XFS_ABTB_CRC_MAGIC : XFS_ABTB_MAGIC)) { -+ do_log(_("Bad btree magic 0x%x\n"), -+ be32_to_cpu(block->bb_magic)); -+ exit(1); -+ } - - if (be16_to_cpu(block->bb_level) == 0) - break; -diff --git a/db/attr.c b/db/attr.c -index 740d564..caa154e 100644 ---- a/db/attr.c -+++ b/db/attr.c -@@ -170,7 +170,7 @@ attr3_leaf_entries_count( - struct xfs_attr3_leafblock *leaf = obj; - - ASSERT(startoff == 0); -- if (be16_to_cpu(leaf->hdr.info.hdr.magic) != XFS_ATTR_LEAF_MAGIC) -+ if (be16_to_cpu(leaf->hdr.info.hdr.magic) != XFS_ATTR3_LEAF_MAGIC) - return 0; - return be16_to_cpu(leaf->hdr.count); - } -diff --git a/db/bit.c b/db/bit.c -index ca57d31..e8adab3 100644 ---- a/db/bit.c -+++ b/db/bit.c -@@ -128,57 +128,41 @@ getbitval( - return rval; - } - -+/* -+ * The input data can be 8, 16, 32, and 64 sized numeric values -+ * aligned on a byte boundry, or odd sized numbers stored on odd -+ * aligned offset (for example the bmbt fields). -+ * -+ * The input data sent to this routine has been converted to big endian -+ * and has been adjusted in the array so that the first input bit is to -+ * be written in the first bit in the output. -+ * -+ * If the field length and the output buffer are byte aligned, then use -+ * memcpy from the input to the output, but if either entries are not byte -+ * aligned, then loop over the entire bit range reading the input value -+ * and set/clear the matching bit in the output. -+ * -+ * example when ibuf is not multiple of a byte in length: -+ * -+ * ibuf: | BBBBBBBB | bbbxxxxx | -+ * \\\\\\\\--\\\\ -+ * obuf+bitoff: | xBBBBBBB | Bbbbxxxx | -+ * -+ */ - void - setbitval( -- void *obuf, /* buffer to write into */ -- int bitoff, /* bit offset of where to write */ -- int nbits, /* number of bits to write */ -- void *ibuf) /* source bits */ -+ void *obuf, /* start of buffer to write into */ -+ int bitoff, /* bit offset into the output buffer */ -+ int nbits, /* number of bits to write */ -+ void *ibuf) /* source bits */ - { -- char *in = (char *)ibuf; -- char *out = (char *)obuf; -- -- int bit; -- --#if BYTE_ORDER == LITTLE_ENDIAN -- int big = 0; --#else -- int big = 1; --#endif -- -- /* only need to swap LE integers */ -- if (big || (nbits!=16 && nbits!=32 && nbits!=64) ) { -- /* We don't have type info, so we can only assume -- * that 2,4 & 8 byte values are integers. sigh. -- */ -- -- /* byte aligned ? */ -- if (bitoff%NBBY) { -- /* no - bit copy */ -- for (bit=0; bitsb_magicnum != XFS_SB_MAGIC) { - fprintf(stderr, _("%s: %s is not a valid XFS filesystem (unexpected SB magic number 0x%08x)\n"), - progname, fsdevice, sbp->sb_magicnum); -- if (!force) -+ if (!force) { -+ fprintf(stderr, _("Use -F to force a read attempt.\n")); - exit(EXIT_FAILURE); -+ } - } - - mp = libxfs_mount(&xmount, sbp, x.ddev, x.logdev, x.rtdev, -diff --git a/db/io.c b/db/io.c -index 123214d..9a787c8 100644 ---- a/db/io.c -+++ b/db/io.c -@@ -449,9 +449,7 @@ write_cur_bbs(void) - - - /* re-read buffer from disk */ -- ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, -- iocur_top->bbmap->b, iocur_top->bbmap->nmaps, -- 0); -+ ret = libxfs_readbufr_map(mp->m_ddev_targp, iocur_top->bp, 0); - if (ret != 0) - dbprintf(_("read error: %s\n"), strerror(ret)); - } -@@ -523,10 +521,11 @@ set_cur( - } - - /* -- * keep the buffer even if the verifier says it is corrupted. -+ * Keep the buffer even if the verifier says it is corrupted. - * We're a diagnostic tool, after all. - */ -- if (!bp || (bp->b_error && bp->b_error != EFSCORRUPTED)) -+ if (!bp || (bp->b_error && bp->b_error != EFSCORRUPTED && -+ bp->b_error != EFSBADCRC)) - return; - iocur_top->buf = bp->b_addr; - iocur_top->bp = bp; -diff --git a/db/io.h b/db/io.h -index 4f24c83..ad39bee 100644 ---- a/db/io.h -+++ b/db/io.h -@@ -41,6 +41,7 @@ typedef struct iocur { - int ino_crc_ok:1; - int ino_buf:1; - int dquot_buf:1; -+ int need_crc:1; - } iocur_t; - - #define DB_RING_ADD 1 /* add to ring on set_cur */ -@@ -66,6 +67,6 @@ static inline bool - iocur_crc_valid() - { - return (iocur_top->bp && -- iocur_top->bp->b_error != EFSCORRUPTED && -+ iocur_top->bp->b_error != EFSBADCRC && - (!iocur_top->ino_buf || iocur_top->ino_crc_ok)); - } -diff --git a/db/metadump.c b/db/metadump.c -index 117dc42..09bb85a 100644 ---- a/db/metadump.c -+++ b/db/metadump.c -@@ -145,6 +145,8 @@ print_progress(const char *fmt, ...) - * even if the dump is exactly aligned, the last index will be full of - * zeros. If the last index entry is non-zero, the dump is incomplete. - * Correspondingly, the last chunk will have a count < num_indicies. -+ * -+ * Return 0 for success, -1 for failure. - */ - - static int -@@ -156,49 +158,88 @@ write_index(void) - metablock->mb_count = cpu_to_be16(cur_index); - if (fwrite(metablock, (cur_index + 1) << BBSHIFT, 1, outf) != 1) { - print_warning("error writing to file: %s", strerror(errno)); -- return 0; -+ return -errno; - } - - memset(block_index, 0, num_indicies * sizeof(__be64)); - cur_index = 0; -- return 1; -+ return 0; -+} -+ -+/* -+ * Return 0 for success, -errno for failure. -+ */ -+static int -+write_buf_segment( -+ char *data, -+ __int64_t off, -+ int len) -+{ -+ int i; -+ int ret; -+ -+ for (i = 0; i < len; i++, off++, data += BBSIZE) { -+ block_index[cur_index] = cpu_to_be64(off); -+ memcpy(&block_buffer[cur_index << BBSHIFT], data, BBSIZE); -+ if (++cur_index == num_indicies) { -+ ret = write_index(); -+ if (ret) -+ return -EIO; -+ } -+ } -+ return 0; - } - -+/* -+ * we want to preserve the state of the metadata in the dump - whether it is -+ * intact or corrupt, so even if the buffer has a verifier attached to it we -+ * don't want to run it prior to writing the buffer to the metadump image. -+ * -+ * The only reason for running the verifier is to recalculate the CRCs on a -+ * buffer that has been obfuscated. i.e. a buffer than metadump modified itself. -+ * In this case, we only run the verifier if the buffer was not corrupt to begin -+ * with so that we don't accidentally correct buffers with CRC or errors in them -+ * when we are obfuscating them. -+ */ - static int - write_buf( - iocur_t *buf) - { -- char *data; -- __int64_t off; -+ struct xfs_buf *bp = buf->bp; - int i; -+ int ret; - - /* - * Run the write verifier to recalculate the buffer CRCs and check -- * we are writing something valid to disk -+ * metadump didn't introduce a new corruption. Warn if the verifier -+ * failed, but still continue to dump it into the output file. - */ -- if (buf->bp && buf->bp->b_ops) { -- buf->bp->b_error = 0; -- buf->bp->b_ops->verify_write(buf->bp); -- if (buf->bp->b_error) { -- fprintf(stderr, -- _("%s: write verifer failed on bno 0x%llx/0x%x\n"), -- __func__, (long long)buf->bp->b_bn, -- buf->bp->b_bcount); -- return buf->bp->b_error; -+ if (buf->need_crc && bp && bp->b_ops && !bp->b_error) { -+ bp->b_ops->verify_write(bp); -+ if (bp->b_error) { -+ print_warning( -+ "obfuscation corrupted block at bno 0x%llx/0x%x", -+ (long long)bp->b_bn, bp->b_bcount); - } - } - -- for (i = 0, off = buf->bb, data = buf->data; -- i < buf->blen; -- i++, off++, data += BBSIZE) { -- block_index[cur_index] = cpu_to_be64(off); -- memcpy(&block_buffer[cur_index << BBSHIFT], data, BBSIZE); -- if (++cur_index == num_indicies) { -- if (!write_index()) -- return 0; -+ /* handle discontiguous buffers */ -+ if (!buf->bbmap) { -+ ret = write_buf_segment(buf->data, buf->bb, buf->blen); -+ if (ret) -+ return ret; -+ } else { -+ int len = 0; -+ for (i = 0; i < buf->bbmap->nmaps; i++) { -+ ret = write_buf_segment(buf->data + BBTOB(len), -+ buf->bbmap->b[i].bm_bn, -+ buf->bbmap->b[i].bm_len); -+ if (ret) -+ return ret; -+ len += buf->bbmap->b[i].bm_len; - } - } -- return !seenint(); -+ return seenint() ? -EINTR : 0; - } - - -@@ -227,7 +268,7 @@ scan_btree( - rval = !stop_on_read_error; - goto pop_out; - } -- if (!write_buf(iocur_top)) -+ if (write_buf(iocur_top)) - goto pop_out; - - if (!(*func)(iocur_top->data, agno, agbno, level - 1, btype, arg)) -@@ -974,16 +1015,23 @@ obfuscate_sf_dir( - } - } - -+/* -+ * The pathname may not be null terminated. It may be terminated by the end of -+ * a buffer or inode literal area, and the start of the next region contains -+ * unknown data. Therefore, when we get to the last component of the symlink, we -+ * cannot assume that strlen() will give us the right result. Hence we need to -+ * track the remaining pathname length and use that instead. -+ */ - static void - obfuscate_path_components( - char *buf, - __uint64_t len) - { -- uchar_t *comp; -+ uchar_t *comp = (uchar_t *)buf; -+ uchar_t *end = comp + len; - xfs_dahash_t hash; - -- comp = (uchar_t *)buf; -- while (comp < (uchar_t *)buf + len) { -+ while (comp < end) { - char *slash; - int namelen; - -@@ -991,7 +1039,7 @@ obfuscate_path_components( - slash = strchr((char *)comp, '/'); - if (!slash) { - /* last (or single) component */ -- namelen = strlen((char *)comp); -+ namelen = strnlen((char *)comp, len); - hash = libxfs_da_hashname(comp, namelen); - obfuscate_name(hash, namelen, comp); - break; -@@ -1000,11 +1048,13 @@ obfuscate_path_components( - /* handle leading or consecutive slashes */ - if (!namelen) { - comp++; -+ len--; - continue; - } - hash = libxfs_da_hashname(comp, namelen); - obfuscate_name(hash, namelen, comp); - comp += namelen + 1; -+ len -= namelen + 1; - } - } - -@@ -1080,24 +1130,11 @@ obfuscate_sf_attr( - } - } - --/* -- * dir_data structure is used to track multi-fsblock dir2 blocks between extent -- * processing calls. -- */ -- --static struct dir_data_s { -- int end_of_data; -- int block_index; -- int offset_to_entry; -- int bad_block; --} dir_data; -- - static void --obfuscate_dir_data_blocks( -- char *block, -- xfs_dfiloff_t offset, -- xfs_dfilblks_t count, -- int is_block_format) -+obfuscate_dir_data_block( -+ char *block, -+ xfs_dfiloff_t offset, -+ int is_block_format) - { - /* - * we have to rely on the fileoffset and signature of the block to -@@ -1105,133 +1142,105 @@ obfuscate_dir_data_blocks( - * for multi-fsblock dir blocks, if a name crosses an extent boundary, - * ignore it and continue. - */ -- int c; -- int dir_offset; -- char *ptr; -- char *endptr; -- -- if (is_block_format && count != mp->m_dirblkfsbs) -- return; /* too complex to handle this rare case */ -- -- for (c = 0, endptr = block; c < count; c++) { -- -- if (dir_data.block_index == 0) { -- int wantmagic; -- struct xfs_dir2_data_hdr *datahdr; -- -- datahdr = (struct xfs_dir2_data_hdr *)block; -- -- if (offset % mp->m_dirblkfsbs != 0) -- return; /* corrupted, leave it alone */ -- -- dir_data.bad_block = 0; -- -- if (is_block_format) { -- xfs_dir2_leaf_entry_t *blp; -- xfs_dir2_block_tail_t *btp; -- -- btp = xfs_dir2_block_tail_p(mp, datahdr); -- blp = xfs_dir2_block_leaf_p(btp); -- if ((char *)blp > (char *)btp) -- blp = (xfs_dir2_leaf_entry_t *)btp; -- -- dir_data.end_of_data = (char *)blp - block; -- wantmagic = XFS_DIR2_BLOCK_MAGIC; -- } else { /* leaf/node format */ -- dir_data.end_of_data = mp->m_dirblkfsbs << -- mp->m_sb.sb_blocklog; -- wantmagic = XFS_DIR2_DATA_MAGIC; -- } -- dir_data.offset_to_entry = -- xfs_dir3_data_entry_offset(datahdr); -+ int dir_offset; -+ char *ptr; -+ char *endptr; -+ int end_of_data; -+ int wantmagic; -+ struct xfs_dir2_data_hdr *datahdr; -+ -+ datahdr = (struct xfs_dir2_data_hdr *)block; -+ -+ if (offset % mp->m_dirblkfsbs != 0) -+ return; /* corrupted, leave it alone */ -+ -+ if (is_block_format) { -+ xfs_dir2_leaf_entry_t *blp; -+ xfs_dir2_block_tail_t *btp; -+ -+ btp = xfs_dir2_block_tail_p(mp, datahdr); -+ blp = xfs_dir2_block_leaf_p(btp); -+ if ((char *)blp > (char *)btp) -+ blp = (xfs_dir2_leaf_entry_t *)btp; -+ -+ end_of_data = (char *)blp - block; -+ if (xfs_sb_version_hascrc(&mp->m_sb)) -+ wantmagic = XFS_DIR3_BLOCK_MAGIC; -+ else -+ wantmagic = XFS_DIR2_BLOCK_MAGIC; -+ } else { /* leaf/node format */ -+ end_of_data = mp->m_dirblkfsbs << mp->m_sb.sb_blocklog; -+ if (xfs_sb_version_hascrc(&mp->m_sb)) -+ wantmagic = XFS_DIR3_DATA_MAGIC; -+ else -+ wantmagic = XFS_DIR2_DATA_MAGIC; -+ } - -- if (be32_to_cpu(datahdr->magic) != wantmagic) { -- if (show_warnings) -- print_warning("invalid magic in dir " -- "inode %llu block %ld", -- (long long)cur_ino, -- (long)offset); -- dir_data.bad_block = 1; -- } -- } -- dir_data.block_index++; -- if (dir_data.block_index == mp->m_dirblkfsbs) -- dir_data.block_index = 0; -+ if (be32_to_cpu(datahdr->magic) != wantmagic) { -+ if (show_warnings) -+ print_warning( -+ "invalid magic in dir inode %llu block %ld", -+ (long long)cur_ino, (long)offset); -+ return; -+ } - -- if (dir_data.bad_block) -- continue; -+ dir_offset = xfs_dir3_data_entry_offset(datahdr); -+ ptr = block + dir_offset; -+ endptr = block + mp->m_sb.sb_blocksize; - -- dir_offset = (dir_data.block_index << mp->m_sb.sb_blocklog) + -- dir_data.offset_to_entry; -- -- ptr = endptr + dir_data.offset_to_entry; -- endptr += mp->m_sb.sb_blocksize; -- -- while (ptr < endptr && dir_offset < dir_data.end_of_data) { -- xfs_dir2_data_entry_t *dep; -- xfs_dir2_data_unused_t *dup; -- int length; -- -- dup = (xfs_dir2_data_unused_t *)ptr; -- -- if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { -- int length = be16_to_cpu(dup->length); -- if (dir_offset + length > dir_data.end_of_data || -- length == 0 || (length & -- (XFS_DIR2_DATA_ALIGN - 1))) { -- if (show_warnings) -- print_warning("invalid length " -- "for dir free space in " -- "inode %llu", -- (long long)cur_ino); -- dir_data.bad_block = 1; -- break; -- } -- if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != -- dir_offset) { -- dir_data.bad_block = 1; -- break; -- } -- dir_offset += length; -- ptr += length; -- if (dir_offset >= dir_data.end_of_data || -- ptr >= endptr) -- break; -- } -+ while (ptr < endptr && dir_offset < end_of_data) { -+ xfs_dir2_data_entry_t *dep; -+ xfs_dir2_data_unused_t *dup; -+ int length; - -- dep = (xfs_dir2_data_entry_t *)ptr; -- length = xfs_dir3_data_entsize(mp, dep->namelen); -+ dup = (xfs_dir2_data_unused_t *)ptr; - -- if (dir_offset + length > dir_data.end_of_data || -- ptr + length > endptr) { -+ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { -+ int length = be16_to_cpu(dup->length); -+ if (dir_offset + length > end_of_data || -+ !length || (length & (XFS_DIR2_DATA_ALIGN - 1))) { - if (show_warnings) -- print_warning("invalid length for " -- "dir entry name in inode %llu", -+ print_warning( -+ "invalid length for dir free space in inode %llu", - (long long)cur_ino); -- break; -+ return; - } -- if (be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) != -- dir_offset) { -- dir_data.bad_block = 1; -- break; -- } -- generate_obfuscated_name(be64_to_cpu(dep->inumber), -- dep->namelen, &dep->name[0]); -+ if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != -+ dir_offset) -+ return; - dir_offset += length; - ptr += length; -+ if (dir_offset >= end_of_data || ptr >= endptr) -+ return; -+ } -+ -+ dep = (xfs_dir2_data_entry_t *)ptr; -+ length = xfs_dir3_data_entsize(mp, dep->namelen); -+ -+ if (dir_offset + length > end_of_data || -+ ptr + length > endptr) { -+ if (show_warnings) -+ print_warning( -+ "invalid length for dir entry name in inode %llu", -+ (long long)cur_ino); -+ return; - } -- dir_data.offset_to_entry = dir_offset & -- (mp->m_sb.sb_blocksize - 1); -+ if (be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) != -+ dir_offset) -+ return; -+ generate_obfuscated_name(be64_to_cpu(dep->inumber), -+ dep->namelen, &dep->name[0]); -+ dir_offset += length; -+ ptr += length; - } - } - - static void --obfuscate_symlink_blocks( -- char *block, -- xfs_dfilblks_t count) -+obfuscate_symlink_block( -+ char *block) - { -- count <<= mp->m_sb.sb_blocklog; -- obfuscate_path_components(block, count); -+ /* XXX: need to handle CRC headers */ -+ obfuscate_path_components(block, mp->m_sb.sb_blocksize); - } - - #define MAX_REMOTE_VALS 4095 -@@ -1252,86 +1261,227 @@ add_remote_vals( - blockidx++; - length -= XFS_LBSIZE(mp); - } -+ -+ if (attr_data.remote_val_count >= MAX_REMOTE_VALS) { -+ print_warning( -+"Overflowed attr obfuscation array. No longer obfuscating remote attrs."); -+ } - } - - static void --obfuscate_attr_blocks( -+obfuscate_attr_block( - char *block, -- xfs_dfiloff_t offset, -- xfs_dfilblks_t count) -+ xfs_dfiloff_t offset) - { - xfs_attr_leafblock_t *leaf; -- int c; - int i; - int nentries; - xfs_attr_leaf_entry_t *entry; - xfs_attr_leaf_name_local_t *local; - xfs_attr_leaf_name_remote_t *remote; - -- for (c = 0; c < count; c++, offset++, block += XFS_LBSIZE(mp)) { -+ leaf = (xfs_attr_leafblock_t *)block; - -- leaf = (xfs_attr_leafblock_t *)block; -- -- if (be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC) { -- for (i = 0; i < attr_data.remote_val_count; i++) { -- if (attr_data.remote_vals[i] == offset) -- memset(block, 0, XFS_LBSIZE(mp)); -- } -- continue; -+ if (be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC) { -+ for (i = 0; i < attr_data.remote_val_count; i++) { -+ /* XXX: need to handle CRC headers */ -+ if (attr_data.remote_vals[i] == offset) -+ memset(block, 0, XFS_LBSIZE(mp)); - } -+ return; -+ } - -- nentries = be16_to_cpu(leaf->hdr.count); -- if (nentries * sizeof(xfs_attr_leaf_entry_t) + -- sizeof(xfs_attr_leaf_hdr_t) > XFS_LBSIZE(mp)) { -+ nentries = be16_to_cpu(leaf->hdr.count); -+ if (nentries * sizeof(xfs_attr_leaf_entry_t) + -+ sizeof(xfs_attr_leaf_hdr_t) > XFS_LBSIZE(mp)) { -+ if (show_warnings) -+ print_warning("invalid attr count in inode %llu", -+ (long long)cur_ino); -+ return; -+ } -+ -+ for (i = 0, entry = &leaf->entries[0]; i < nentries; i++, entry++) { -+ if (be16_to_cpu(entry->nameidx) > XFS_LBSIZE(mp)) { - if (show_warnings) -- print_warning("invalid attr count in inode %llu", -+ print_warning( -+ "invalid attr nameidx in inode %llu", - (long long)cur_ino); -- continue; -+ break; - } -- -- for (i = 0, entry = &leaf->entries[0]; i < nentries; -- i++, entry++) { -- if (be16_to_cpu(entry->nameidx) > XFS_LBSIZE(mp)) { -+ if (entry->flags & XFS_ATTR_LOCAL) { -+ local = xfs_attr3_leaf_name_local(leaf, i); -+ if (local->namelen == 0) { - if (show_warnings) -- print_warning("invalid attr nameidx " -- "in inode %llu", -- (long long)cur_ino); -+ print_warning( -+ "zero length for attr name in inode %llu", -+ (long long)cur_ino); - break; - } -- if (entry->flags & XFS_ATTR_LOCAL) { -- local = xfs_attr3_leaf_name_local(leaf, i); -- if (local->namelen == 0) { -- if (show_warnings) -- print_warning("zero length for " -- "attr name in inode %llu", -- (long long)cur_ino); -- break; -- } -- generate_obfuscated_name(0, local->namelen, -- &local->nameval[0]); -- memset(&local->nameval[local->namelen], 0, -- be16_to_cpu(local->valuelen)); -- } else { -- remote = xfs_attr3_leaf_name_remote(leaf, i); -- if (remote->namelen == 0 || -- remote->valueblk == 0) { -- if (show_warnings) -- print_warning("invalid attr " -- "entry in inode %llu", -- (long long)cur_ino); -- break; -- } -- generate_obfuscated_name(0, remote->namelen, -- &remote->name[0]); -- add_remote_vals(be32_to_cpu(remote->valueblk), -+ generate_obfuscated_name(0, local->namelen, -+ &local->nameval[0]); -+ memset(&local->nameval[local->namelen], 0, -+ be16_to_cpu(local->valuelen)); -+ } else { -+ remote = xfs_attr3_leaf_name_remote(leaf, i); -+ if (remote->namelen == 0 || remote->valueblk == 0) { -+ if (show_warnings) -+ print_warning( -+ "invalid attr entry in inode %llu", -+ (long long)cur_ino); -+ break; -+ } -+ generate_obfuscated_name(0, remote->namelen, -+ &remote->name[0]); -+ add_remote_vals(be32_to_cpu(remote->valueblk), - be32_to_cpu(remote->valuelen)); -+ } -+ } -+} -+ -+static int -+process_single_fsb_objects( -+ xfs_dfiloff_t o, -+ xfs_dfsbno_t s, -+ xfs_dfilblks_t c, -+ typnm_t btype, -+ xfs_dfiloff_t last) -+{ -+ char *dp; -+ int ret = 0; -+ int i; -+ -+ for (i = 0; i < c; i++) { -+ push_cur(); -+ set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, s), blkbb, -+ DB_RING_IGN, NULL); -+ -+ if (!iocur_top->data) { -+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, s); -+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, s); -+ -+ print_warning("cannot read %s block %u/%u (%llu)", -+ typtab[btype].name, agno, agbno, s); -+ if (stop_on_read_error) -+ ret = -EIO; -+ goto out_pop; -+ -+ } -+ -+ if (dont_obfuscate) -+ goto write; -+ -+ dp = iocur_top->data; -+ switch (btype) { -+ case TYP_DIR2: -+ if (o >= mp->m_dirleafblk) -+ break; -+ -+ obfuscate_dir_data_block(dp, o, -+ last == mp->m_dirblkfsbs); -+ iocur_top->need_crc = 1; -+ break; -+ case TYP_SYMLINK: -+ obfuscate_symlink_block(dp); -+ iocur_top->need_crc = 1; -+ break; -+ case TYP_ATTR: -+ obfuscate_attr_block(dp, o); -+ iocur_top->need_crc = 1; -+ break; -+ default: -+ break; -+ } -+ -+write: -+ ret = write_buf(iocur_top); -+out_pop: -+ pop_cur(); -+ if (ret) -+ break; -+ o++; -+ s++; -+ } -+ -+ return ret; -+} -+ -+/* -+ * Static map to aggregate multiple extents into a single directory block. -+ */ -+static struct bbmap mfsb_map; -+static int mfsb_length; -+ -+static int -+process_multi_fsb_objects( -+ xfs_dfiloff_t o, -+ xfs_dfsbno_t s, -+ xfs_dfilblks_t c, -+ typnm_t btype, -+ xfs_dfiloff_t last) -+{ -+ int ret = 0; -+ -+ switch (btype) { -+ case TYP_DIR2: -+ break; -+ default: -+ print_warning("bad type for multi-fsb object %d", btype); -+ return -EINVAL; -+ } -+ -+ while (c > 0) { -+ unsigned int bm_len; -+ -+ if (mfsb_length + c >= mp->m_dirblkfsbs) { -+ bm_len = mp->m_dirblkfsbs - mfsb_length; -+ mfsb_length = 0; -+ } else { -+ mfsb_length += c; -+ bm_len = c; -+ } -+ -+ mfsb_map.b[mfsb_map.nmaps].bm_bn = XFS_FSB_TO_DADDR(mp, s); -+ mfsb_map.b[mfsb_map.nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len); -+ mfsb_map.nmaps++; -+ -+ if (mfsb_length == 0) { -+ push_cur(); -+ set_cur(&typtab[btype], 0, 0, DB_RING_IGN, &mfsb_map); -+ if (!iocur_top->data) { -+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, s); -+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, s); -+ -+ print_warning("cannot read %s block %u/%u (%llu)", -+ typtab[btype].name, agno, agbno, s); -+ if (stop_on_read_error) -+ ret = -1; -+ goto out_pop; -+ -+ } -+ -+ if (dont_obfuscate || o >= mp->m_dirleafblk) { -+ ret = write_buf(iocur_top); -+ goto out_pop; - } -+ -+ obfuscate_dir_data_block(iocur_top->data, o, -+ last == mp->m_dirblkfsbs); -+ iocur_top->need_crc = 1; -+ ret = write_buf(iocur_top); -+out_pop: -+ pop_cur(); -+ mfsb_map.nmaps = 0; -+ if (ret) -+ break; - } -+ c -= bm_len; -+ s += bm_len; - } -+ -+ return ret; - } - - /* inode copy routines */ -- - static int - process_bmbt_reclist( - xfs_bmbt_rec_t *rp, -@@ -1346,6 +1496,7 @@ process_bmbt_reclist( - xfs_dfiloff_t last; - xfs_agnumber_t agno; - xfs_agblock_t agbno; -+ int error; - - if (btype == TYP_DATA) - return 1; -@@ -1407,44 +1558,14 @@ process_bmbt_reclist( - break; - } - -- push_cur(); -- set_cur(&typtab[btype], XFS_FSB_TO_DADDR(mp, s), c * blkbb, -- DB_RING_IGN, NULL); -- if (iocur_top->data == NULL) { -- print_warning("cannot read %s block %u/%u (%llu)", -- typtab[btype].name, agno, agbno, s); -- if (stop_on_read_error) { -- pop_cur(); -- return 0; -- } -+ /* multi-extent blocks require special handling */ -+ if (btype != TYP_DIR2 || mp->m_dirblkfsbs == 1) { -+ error = process_single_fsb_objects(o, s, c, btype, last); - } else { -- if (!dont_obfuscate) -- switch (btype) { -- case TYP_DIR2: -- if (o < mp->m_dirleafblk) -- obfuscate_dir_data_blocks( -- iocur_top->data, o, c, -- last == mp->m_dirblkfsbs); -- break; -- -- case TYP_SYMLINK: -- obfuscate_symlink_blocks( -- iocur_top->data, c); -- break; -- -- case TYP_ATTR: -- obfuscate_attr_blocks(iocur_top->data, -- o, c); -- break; -- -- default: ; -- } -- if (!write_buf(iocur_top)) { -- pop_cur(); -- return 0; -- } -+ error = process_multi_fsb_objects(o, s, c, btype, last); - } -- pop_cur(); -+ if (error) -+ return 0; - } - - return 1; -@@ -1626,6 +1747,13 @@ process_inode_data( - return 1; - } - -+/* -+ * when we process the inode, we may change the data in the data and/or -+ * attribute fork if they are in short form and we are obfuscating names. -+ * In this case we need to recalculate the CRC of the inode, but we should -+ * only do that if the CRC in the inode is good to begin with. If the crc -+ * is not ok, we just leave it alone. -+ */ - static int - process_inode( - xfs_agnumber_t agno, -@@ -1633,18 +1761,30 @@ process_inode( - xfs_dinode_t *dip) - { - int success; -+ bool crc_was_ok = false; /* no recalc by default */ -+ bool need_new_crc = false; - - success = 1; - cur_ino = XFS_AGINO_TO_INO(mp, agno, agino); - -+ /* we only care about crc recalculation if we are obfuscating names. */ -+ if (!dont_obfuscate) { -+ crc_was_ok = xfs_verify_cksum((char *)dip, -+ mp->m_sb.sb_inodesize, -+ offsetof(struct xfs_dinode, di_crc)); -+ } -+ - /* copy appropriate data fork metadata */ - switch (be16_to_cpu(dip->di_mode) & S_IFMT) { - case S_IFDIR: -- memset(&dir_data, 0, sizeof(dir_data)); - success = process_inode_data(dip, TYP_DIR2); -+ if (dip->di_format == XFS_DINODE_FMT_LOCAL) -+ need_new_crc = 1; - break; - case S_IFLNK: - success = process_inode_data(dip, TYP_SYMLINK); -+ if (dip->di_format == XFS_DINODE_FMT_LOCAL) -+ need_new_crc = 1; - break; - case S_IFREG: - success = process_inode_data(dip, TYP_DATA); -@@ -1659,6 +1799,7 @@ process_inode( - attr_data.remote_val_count = 0; - switch (dip->di_aformat) { - case XFS_DINODE_FMT_LOCAL: -+ need_new_crc = 1; - if (!dont_obfuscate) - obfuscate_sf_attr(dip); - break; -@@ -1673,6 +1814,9 @@ process_inode( - } - nametable_clear(); - } -+ -+ if (crc_was_ok && need_new_crc) -+ xfs_dinode_calc_crc(mp, dip); - return success; - } - -@@ -1743,12 +1887,9 @@ copy_inode_chunk( - - if (!process_inode(agno, agino + i, dip)) - goto pop_out; -- -- /* calculate the new CRC for the inode */ -- xfs_dinode_calc_crc(mp, dip); - } - skip_processing: -- if (!write_buf(iocur_top)) -+ if (write_buf(iocur_top)) - goto pop_out; - - inodes_copied += XFS_INODES_PER_CHUNK; -@@ -1866,7 +2007,7 @@ scan_ag( - if (stop_on_read_error) - goto pop_out; - } else { -- if (!write_buf(iocur_top)) -+ if (write_buf(iocur_top)) - goto pop_out; - } - -@@ -1881,7 +2022,7 @@ scan_ag( - if (stop_on_read_error) - goto pop_out; - } else { -- if (!write_buf(iocur_top)) -+ if (write_buf(iocur_top)) - goto pop_out; - } - -@@ -1896,7 +2037,7 @@ scan_ag( - if (stop_on_read_error) - goto pop_out; - } else { -- if (!write_buf(iocur_top)) -+ if (write_buf(iocur_top)) - goto pop_out; - } - -@@ -1910,7 +2051,7 @@ scan_ag( - if (stop_on_read_error) - goto pop_out; - } else { -- if (!write_buf(iocur_top)) -+ if (write_buf(iocur_top)) - goto pop_out; - } - -@@ -2015,7 +2156,7 @@ copy_log(void) - print_warning("cannot read log"); - return !stop_on_read_error; - } -- return write_buf(iocur_top); -+ return !write_buf(iocur_top); - } - - static int -@@ -2121,7 +2262,7 @@ metadump_f( - - /* write the remaining index */ - if (!exitcode) -- exitcode = !write_index(); -+ exitcode = write_index() < 0; - - if (progress_since_warning) - fputc('\n', (outf == stdout) ? stderr : stdout); -diff --git a/db/write.c b/db/write.c -index 091ddb3..7b34fc0 100644 ---- a/db/write.c -+++ b/db/write.c -@@ -439,55 +439,78 @@ convert_oct( - - #define NYBBLE(x) (isdigit(x)?(x-'0'):(tolower(x)-'a'+0xa)) - -+/* -+ * convert_arg allows input in the following forms: -+ * -+ * - A string ("ABTB") whose ASCII value is placed in an array in the order -+ * matching the input. -+ * -+ * - An even number of hex numbers. If the length is greater than 64 bits, -+ * then the output is an array of bytes whose top nibble is the first hex -+ * digit in the input, the lower nibble is the second hex digit in the -+ * input. UUID entries are entered in this manner. -+ * -+ * - A decimal or hexadecimal integer to be used with setbitval(). -+ * -+ * Numbers that are passed to setbitval() need to be in big endian format and -+ * are adjusted in the buffer so that the first input bit is to be be written to -+ * the first bit in the output. -+ */ - static char * - convert_arg( -- char *arg, -- int bit_length) -+ char *arg, -+ int bit_length) - { -- int i; -- static char *buf = NULL; -- char *rbuf; -- long long *value; -- int alloc_size; -- char *ostr; -- int octval, ret; -+ int i; -+ int alloc_size; -+ int octval; -+ int offset; -+ int ret; -+ static char *buf = NULL; -+ char *endp; -+ char *rbuf; -+ char *ostr; -+ __u64 *value; -+ __u64 val = 0; - - if (bit_length <= 64) - alloc_size = 8; - else -- alloc_size = (bit_length+7)/8; -+ alloc_size = (bit_length + 7) / 8; - - buf = xrealloc(buf, alloc_size); - memset(buf, 0, alloc_size); -- value = (long long *)buf; -+ value = (__u64 *)buf; - rbuf = buf; - - if (*arg == '\"') { -- /* handle strings */ -+ /* input a string and output ASCII array of characters */ - - /* zap closing quote if there is one */ -- if ((ostr = strrchr(arg+1, '\"')) != NULL) -+ ostr = strrchr(arg + 1, '\"'); -+ if (ostr) - *ostr = '\0'; - -- ostr = arg+1; -+ ostr = arg + 1; - for (i = 0; i < alloc_size; i++) { - if (!*ostr) - break; - -- /* do octal */ -+ /* do octal conversion */ - if (*ostr == '\\') { -- if (*(ostr+1) >= '0' || *(ostr+1) <= '7') { -- ret = convert_oct(ostr+1, &octval); -+ if (*(ostr + 1) >= '0' || *(ostr + 1) <= '7') { -+ ret = convert_oct(ostr + 1, &octval); - *rbuf++ = octval; -- ostr += ret+1; -+ ostr += ret + 1; - continue; - } - } - *rbuf++ = *ostr++; - } -- - return buf; -- } else if (arg[0] == '#' || ((arg[0] != '-') && strchr(arg,'-'))) { -+ } -+ -+ if (arg[0] == '#' || ((arg[0] != '-') && strchr(arg,'-'))) { - /* - * handle hex blocks ie - * #00112233445566778899aabbccddeeff -@@ -496,48 +519,79 @@ convert_arg( - * - * (but if it starts with "-" assume it's just an integer) - */ -- int bytes=bit_length/8; -+ int bytes = bit_length / NBBY; -+ -+ /* is this an array of hec numbers? */ -+ if (bit_length % NBBY) -+ return NULL; - - /* skip leading hash */ -- if (*arg=='#') arg++; -+ if (*arg == '#') -+ arg++; - - while (*arg && bytes--) { -- /* skip hypens */ -- while (*arg=='-') arg++; -- -- /* get first nybble */ -- if (!isxdigit((int)*arg)) return NULL; -- *rbuf=NYBBLE((int)*arg)<<4; -- arg++; -- -- /* skip more hyphens */ -- while (*arg=='-') arg++; -- -- /* get second nybble */ -- if (!isxdigit((int)*arg)) return NULL; -- *rbuf++|=NYBBLE((int)*arg); -- arg++; -+ /* skip hypens */ -+ while (*arg == '-') -+ arg++; -+ -+ /* get first nybble */ -+ if (!isxdigit((int)*arg)) -+ return NULL; -+ *rbuf = NYBBLE((int)*arg) << 4; -+ arg++; -+ -+ /* skip more hyphens */ -+ while (*arg == '-') -+ arg++; -+ -+ /* get second nybble */ -+ if (!isxdigit((int)*arg)) -+ return NULL; -+ *rbuf++ |= NYBBLE((int)*arg); -+ arg++; - } -- if (bytes<0&&*arg) return NULL; -+ if (bytes < 0 && *arg) -+ return NULL; -+ - return buf; -- } else { -- /* -- * handle integers -- */ -- *value = strtoll(arg, NULL, 0); -- --#if __BYTE_ORDER == BIG_ENDIAN -- /* hackery for big endian */ -- if (bit_length <= 8) { -- rbuf += 7; -- } else if (bit_length <= 16) { -- rbuf += 6; -- } else if (bit_length <= 32) { -- rbuf += 4; -- } --#endif -- return rbuf; - } -+ -+ /* handle decimal / hexadecimal integers */ -+ val = strtoll(arg, &endp, 0); -+ /* return if not a clean number */ -+ if (*endp != '\0') -+ return NULL; -+ -+ /* Does the value fit into the range of the destination bitfield? */ -+ if ((val >> bit_length) > 0) -+ return NULL; -+ /* -+ * If the length of the field is not a multiple of a byte, push -+ * the bits up in the field, so the most signicant field bit is -+ * the most significant bit in the byte: -+ * -+ * before: -+ * val |----|----|----|----|----|--MM|mmmm|llll| -+ * after -+ * val |----|----|----|----|----|MMmm|mmll|ll00| -+ */ -+ offset = bit_length % NBBY; -+ if (offset) -+ val <<= (NBBY - offset); -+ -+ /* -+ * convert to big endian and copy into the array -+ * rbuf |----|----|----|----|----|MMmm|mmll|ll00| -+ */ -+ *value = cpu_to_be64(val); -+ -+ /* -+ * Align the array to point to the field in the array. -+ * rbuf = |MMmm|mmll|ll00| -+ */ -+ offset = sizeof(__be64) - 1 - ((bit_length - 1) / sizeof(__be64)); -+ rbuf += offset; -+ return rbuf; - } - - -@@ -550,9 +604,9 @@ write_struct( - { - const ftattr_t *fa; - flist_t *fl; -- flist_t *sfl; -- int bit_length; -- char *buf; -+ flist_t *sfl; -+ int bit_length; -+ char *buf; - int parentoffset; - - if (argc != 2) { -diff --git a/db/xfs_metadump.sh b/db/xfs_metadump.sh -index 28b04b8..a95d5a5 100755 ---- a/db/xfs_metadump.sh -+++ b/db/xfs_metadump.sh -@@ -5,9 +5,9 @@ - - OPTS=" " - DBOPTS=" " --USAGE="Usage: xfs_metadump [-efogwV] [-m max_extents] [-l logdev] source target" -+USAGE="Usage: xfs_metadump [-efFogwV] [-m max_extents] [-l logdev] source target" - --while getopts "efgl:m:owV" c -+while getopts "efgl:m:owFV" c - do - case $c in - e) OPTS=$OPTS"-e ";; -@@ -17,6 +17,7 @@ do - w) OPTS=$OPTS"-w ";; - f) DBOPTS=$DBOPTS" -f";; - l) DBOPTS=$DBOPTS" -l "$OPTARG" ";; -+ F) DBOPTS=$DBOPTS" -F";; - V) xfs_db -p xfs_metadump -V - status=$? - exit $status -@@ -29,7 +30,7 @@ done - set -- extra $@ - shift $OPTIND - case $# in -- 2) xfs_db$DBOPTS -F -i -p xfs_metadump -c "metadump$OPTS $2" $1 -+ 2) xfs_db$DBOPTS -i -p xfs_metadump -c "metadump$OPTS $2" $1 - status=$? - ;; - *) echo $USAGE 1>&2 -diff --git a/growfs/xfs_growfs.c b/growfs/xfs_growfs.c -index 2df68fb..fb7eda8 100644 ---- a/growfs/xfs_growfs.c -+++ b/growfs/xfs_growfs.c -@@ -189,7 +189,7 @@ main(int argc, char **argv) - usage(); - if (iflag && xflag) - usage(); -- if (dflag + lflag + rflag == 0) -+ if (dflag + lflag + rflag + mflag == 0) - aflag = 1; - - fs_table_initialise(0, NULL, 0, NULL); -@@ -305,12 +305,15 @@ main(int argc, char **argv) - drsize -= (drsize % 2); - - error = 0; -- if (dflag | aflag) { -+ -+ if (dflag | mflag | aflag) { - xfs_growfs_data_t in; - - if (!mflag) - maxpct = geo.imaxpct; -- if (!dsize) -+ if (!dflag && !aflag) /* Only mflag, no data size change */ -+ dsize = geo.datablocks; -+ else if (!dsize) - dsize = ddsize / (geo.blocksize / BBSIZE); - else if (dsize > ddsize / (geo.blocksize / BBSIZE)) { - fprintf(stderr, _( -diff --git a/include/cache.h b/include/cache.h -index 76cb234..0a84c69 100644 ---- a/include/cache.h -+++ b/include/cache.h -@@ -66,7 +66,8 @@ typedef void (*cache_walk_t)(struct cache_node *); - typedef struct cache_node * (*cache_node_alloc_t)(cache_key_t); - typedef void (*cache_node_flush_t)(struct cache_node *); - typedef void (*cache_node_relse_t)(struct cache_node *); --typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int); -+typedef unsigned int (*cache_node_hash_t)(cache_key_t, unsigned int, -+ unsigned int); - typedef int (*cache_node_compare_t)(struct cache_node *, cache_key_t); - typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *); - -@@ -112,6 +113,7 @@ struct cache { - cache_node_compare_t compare; /* comparison routine */ - cache_bulk_relse_t bulkrelse; /* bulk release routine */ - unsigned int c_hashsize; /* hash bucket count */ -+ unsigned int c_hashshift; /* hash key shift */ - struct cache_hash *c_hash; /* hash table buckets */ - struct cache_mru c_mrus[CACHE_MAX_PRIORITY + 1]; - unsigned long long c_misses; /* cache misses */ -diff --git a/include/darwin.h b/include/darwin.h -index 97b8990..95f865b 100644 ---- a/include/darwin.h -+++ b/include/darwin.h -@@ -150,6 +150,7 @@ typedef unsigned char uchar_t; - - #define ENOATTR 989 /* Attribute not found */ - #define EFSCORRUPTED 990 /* Filesystem is corrupted */ -+#define EFSBADCRC 991 /* Bad CRC detected */ - #define constpp char * const * - - #define HAVE_FID 1 -diff --git a/include/freebsd.h b/include/freebsd.h -index 2e1ae49..b51688b 100644 ---- a/include/freebsd.h -+++ b/include/freebsd.h -@@ -45,6 +45,7 @@ - #define constpp char * const * - - #define EFSCORRUPTED 990 /* Filesystem is corrupted */ -+#define EFSBADCRC 991 /* Bad CRC detected */ - - typedef off_t xfs_off_t; - typedef off_t off64_t; -diff --git a/include/gnukfreebsd.h b/include/gnukfreebsd.h -index 1ec291f..2140acd 100644 ---- a/include/gnukfreebsd.h -+++ b/include/gnukfreebsd.h -@@ -36,6 +36,7 @@ - #define constpp char * const * - - #define EFSCORRUPTED 990 /* Filesystem is corrupted */ -+#define EFSBADCRC 991 /* Bad CRC detected */ - - typedef off_t xfs_off_t; - typedef __uint64_t xfs_ino_t; -diff --git a/include/irix.h b/include/irix.h -index a450684..5040451 100644 ---- a/include/irix.h -+++ b/include/irix.h -@@ -52,6 +52,8 @@ typedef char* xfs_caddr_t; - #define xfs_flock64 flock64 - #define xfs_flock64_t struct flock64 - -+#define EFSBADCRC 991 /* Bad CRC detected */ -+ - typedef struct xfs_error_injection { - __int32_t fd; - __int32_t errtag; -diff --git a/include/libxfs.h b/include/libxfs.h -index 4bf331c..6bc6c94 100644 ---- a/include/libxfs.h -+++ b/include/libxfs.h -@@ -144,6 +144,7 @@ extern void libxfs_device_close (dev_t); - extern int libxfs_device_alignment (void); - extern void libxfs_report(FILE *); - extern void platform_findsizes(char *path, int fd, long long *sz, int *bsz); -+extern int platform_nproc(void); - - /* check or write log footer: specify device, log size in blocks & uuid */ - typedef xfs_caddr_t (libxfs_get_block_t)(xfs_caddr_t, int, void *); -@@ -364,7 +365,7 @@ enum xfs_buf_flags_t { /* b_flags bits */ - #define XFS_BUF_PRIORITY(bp) (cache_node_get_priority( \ - (struct cache_node *)(bp))) - #define xfs_buf_set_ref(bp,ref) ((void) 0) --#define xfs_buf_ioerror(bp,err) (bp)->b_error = (err); -+#define xfs_buf_ioerror(bp,err) ((bp)->b_error = (err)) - - #define xfs_daddr_to_agno(mp,d) \ - ((xfs_agnumber_t)(XFS_BB_TO_FSBT(mp, d) / (mp)->m_sb.sb_agblocks)) -@@ -392,9 +393,9 @@ extern struct cache_operations libxfs_bcache_operations; - #define libxfs_getbuf(dev, daddr, len) \ - libxfs_trace_getbuf(__FUNCTION__, __FILE__, __LINE__, \ - (dev), (daddr), (len)) --#define libxfs_getbuf_map(dev, map, nmaps) \ -+#define libxfs_getbuf_map(dev, map, nmaps, flags) \ - libxfs_trace_getbuf_map(__FUNCTION__, __FILE__, __LINE__, \ -- (dev), (map), (nmaps)) -+ (dev), (map), (nmaps), (flags)) - #define libxfs_getbuf_flags(dev, daddr, len, flags) \ - libxfs_trace_getbuf_flags(__FUNCTION__, __FILE__, __LINE__, \ - (dev), (daddr), (len), (flags)) -@@ -412,7 +413,7 @@ extern int libxfs_trace_writebuf(const char *, const char *, int, - extern xfs_buf_t *libxfs_trace_getbuf(const char *, const char *, int, - struct xfs_buftarg *, xfs_daddr_t, int); - extern xfs_buf_t *libxfs_trace_getbuf_map(const char *, const char *, int, -- struct xfs_buftarg *, struct xfs_buf_map *, int); -+ struct xfs_buftarg *, struct xfs_buf_map *, int, int); - extern xfs_buf_t *libxfs_trace_getbuf_flags(const char *, const char *, int, - struct xfs_buftarg *, xfs_daddr_t, int, unsigned int); - extern void libxfs_trace_putbuf (const char *, const char *, int, -@@ -427,7 +428,7 @@ extern xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, - extern int libxfs_writebuf(xfs_buf_t *, int); - extern xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int); - extern xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, -- struct xfs_buf_map *, int); -+ struct xfs_buf_map *, int, int); - extern xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, - int, unsigned int); - extern void libxfs_putbuf (xfs_buf_t *); -@@ -448,8 +449,7 @@ extern void libxfs_putbufr(xfs_buf_t *); - extern int libxfs_writebuf_int(xfs_buf_t *, int); - extern int libxfs_writebufr(struct xfs_buf *); - extern int libxfs_readbufr(struct xfs_buftarg *, xfs_daddr_t, xfs_buf_t *, int, int); --extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, -- struct xfs_buf_map *, int, int); -+extern int libxfs_readbufr_map(struct xfs_buftarg *, struct xfs_buf *, int); - - extern int libxfs_bhash_size; - -@@ -779,6 +779,20 @@ extern uint32_t crc32c_le(uint32_t crc, unsigned char const *p, size_t len); - - #include - -+static inline int -+xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) -+{ -+ return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -+ cksum_offset); -+} -+ -+static inline void -+xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) -+{ -+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -+ cksum_offset); -+} -+ - #define xfs_notice(mp,fmt,args...) cmn_err(CE_NOTE,fmt, ## args) - #define xfs_warn(mp,fmt,args...) cmn_err(CE_WARN,fmt, ## args) - #define xfs_alert(mp,fmt,args...) cmn_err(CE_ALERT,fmt, ## args) -diff --git a/include/linux.h b/include/linux.h -index 502fd1f..5586290 100644 ---- a/include/linux.h -+++ b/include/linux.h -@@ -136,6 +136,7 @@ platform_discard_blocks(int fd, uint64_t start, uint64_t len) - - #define ENOATTR ENODATA /* Attribute not found */ - #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ -+#define EFSBADCRC EBADMSG /* Bad CRC detected */ - - typedef loff_t xfs_off_t; - typedef __uint64_t xfs_ino_t; -diff --git a/include/xfs_ag.h b/include/xfs_ag.h -index 3fc1098..0fdd410 100644 ---- a/include/xfs_ag.h -+++ b/include/xfs_ag.h -@@ -89,6 +89,8 @@ typedef struct xfs_agf { - /* structure must be padded to 64 bit alignment */ - } xfs_agf_t; - -+#define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) -+ - #define XFS_AGF_MAGICNUM 0x00000001 - #define XFS_AGF_VERSIONNUM 0x00000002 - #define XFS_AGF_SEQNO 0x00000004 -@@ -167,6 +169,8 @@ typedef struct xfs_agi { - /* structure must be padded to 64 bit alignment */ - } xfs_agi_t; - -+#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) -+ - #define XFS_AGI_MAGICNUM 0x00000001 - #define XFS_AGI_VERSIONNUM 0x00000002 - #define XFS_AGI_SEQNO 0x00000004 -@@ -222,6 +226,8 @@ typedef struct xfs_agfl { - __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ - } xfs_agfl_t; - -+#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) -+ - /* - * tags for inode radix tree - */ -diff --git a/include/xfs_dinode.h b/include/xfs_dinode.h -index e5869b5..623bbe8 100644 ---- a/include/xfs_dinode.h -+++ b/include/xfs_dinode.h -@@ -89,6 +89,8 @@ typedef struct xfs_dinode { - /* structure must be padded to 64 bit alignment */ - } xfs_dinode_t; - -+#define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) -+ - #define DI_MAX_FLUSH 0xffff - - /* -diff --git a/include/xfs_dir2.h b/include/xfs_dir2.h -index 9910401..3900130 100644 ---- a/include/xfs_dir2.h -+++ b/include/xfs_dir2.h -@@ -57,6 +57,9 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, - extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, uint resblks); - -+#define S_SHIFT 12 -+extern const unsigned char xfs_mode_to_ftype[]; -+ - /* - * Direct call from the bmap code, bypassing the generic directory layer. - */ -diff --git a/include/xfs_format.h b/include/xfs_format.h -index 997c770..77f6b8b 100644 ---- a/include/xfs_format.h -+++ b/include/xfs_format.h -@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr { - __be64 sl_lsn; - }; - -+#define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) -+ - /* - * The maximum pathlen is 1024 bytes. Since the minimum file system - * blocksize is 512 bytes, we can get a max of 3 extents back from -diff --git a/include/xfs_sb.h b/include/xfs_sb.h -index 35061d4..f7b2fe7 100644 ---- a/include/xfs_sb.h -+++ b/include/xfs_sb.h -@@ -182,6 +182,8 @@ typedef struct xfs_sb { - /* must be padded to 64 bit alignment */ - } xfs_sb_t; - -+#define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) -+ - /* - * Superblock - on disk version. Must match the in core version above. - * Must be padded to 64 bit alignment. -diff --git a/io/file.c b/io/file.c -index db85ffc..73b893f 100644 ---- a/io/file.c -+++ b/io/file.c -@@ -36,7 +36,7 @@ print_fileio( - int index, - int braces) - { -- printf(_("%c%03d%c %-14s (%s,%s,%s,%s%s%s%s)\n"), -+ printf(_("%c%03d%c %-14s (%s,%s,%s,%s%s%s%s%s)\n"), - braces? '[' : ' ', index, braces? ']' : ' ', file->name, - file->flags & IO_FOREIGN ? _("foreign") : _("xfs"), - file->flags & IO_OSYNC ? _("sync") : _("non-sync"), -@@ -44,7 +44,8 @@ print_fileio( - file->flags & IO_READONLY ? _("read-only") : _("read-write"), - file->flags & IO_REALTIME ? _(",real-time") : "", - file->flags & IO_APPEND ? _(",append-only") : "", -- file->flags & IO_NONBLOCK ? _(",non-block") : ""); -+ file->flags & IO_NONBLOCK ? _(",non-block") : "", -+ file->flags & IO_TMPFILE ? _(",tmpfile") : ""); - } - - int -diff --git a/io/imap.c b/io/imap.c -index 0a4f14e..4f3f883 100644 ---- a/io/imap.c -+++ b/io/imap.c -@@ -67,7 +67,7 @@ imap_init(void) - imap_cmd.name = "imap"; - imap_cmd.cfunc = imap_f; - imap_cmd.argmin = 0; -- imap_cmd.argmax = 0; -+ imap_cmd.argmax = 1; - imap_cmd.args = _("[nentries]"); - imap_cmd.flags = CMD_NOMAP_OK; - imap_cmd.oneline = _("inode map for filesystem of current file"); -diff --git a/io/init.c b/io/init.c -index ef9e4cb..1e2690e 100644 ---- a/io/init.c -+++ b/io/init.c -@@ -136,7 +136,7 @@ init( - pagesize = getpagesize(); - gettimeofday(&stopwatch, NULL); - -- while ((c = getopt(argc, argv, "ac:dFfmp:nrRstVx")) != EOF) { -+ while ((c = getopt(argc, argv, "ac:dFfmp:nrRstTVx")) != EOF) { - switch (c) { - case 'a': - flags |= IO_APPEND; -@@ -179,6 +179,9 @@ init( - case 'R': - flags |= IO_REALTIME; - break; -+ case 'T': -+ flags |= IO_TMPFILE; -+ break; - case 'x': - expert = 1; - break; -diff --git a/io/io.h b/io/io.h -index 6c3f627..0d2d768 100644 ---- a/io/io.h -+++ b/io/io.h -@@ -35,6 +35,7 @@ - #define IO_TRUNC (1<<6) - #define IO_FOREIGN (1<<7) - #define IO_NONBLOCK (1<<8) -+#define IO_TMPFILE (1<<9) - - /* - * Regular file I/O control -diff --git a/io/open.c b/io/open.c -index cc677e6..c106fa7 100644 ---- a/io/open.c -+++ b/io/open.c -@@ -22,6 +22,22 @@ - #include "init.h" - #include "io.h" - -+#ifndef __O_TMPFILE -+#if defined __alpha__ -+#define __O_TMPFILE 0100000000 -+#elif defined(__hppa__) -+#define __O_TMPFILE 040000000 -+#elif defined(__sparc__) -+#define __O_TMPFILE 0x2000000 -+#else -+#define __O_TMPFILE 020000000 -+#endif -+#endif /* __O_TMPFILE */ -+ -+#ifndef O_TMPFILE -+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) -+#endif -+ - static cmdinfo_t open_cmd; - static cmdinfo_t stat_cmd; - static cmdinfo_t close_cmd; -@@ -77,13 +93,14 @@ stat_f( - int verbose = (argc == 2 && !strcmp(argv[1], "-v")); - - printf(_("fd.path = \"%s\"\n"), file->name); -- printf(_("fd.flags = %s,%s,%s%s%s%s\n"), -+ printf(_("fd.flags = %s,%s,%s%s%s%s%s\n"), - file->flags & IO_OSYNC ? _("sync") : _("non-sync"), - file->flags & IO_DIRECT ? _("direct") : _("non-direct"), - file->flags & IO_READONLY ? _("read-only") : _("read-write"), - file->flags & IO_REALTIME ? _(",real-time") : "", - file->flags & IO_APPEND ? _(",append-only") : "", -- file->flags & IO_NONBLOCK ? _(",non-block") : ""); -+ file->flags & IO_NONBLOCK ? _(",non-block") : "", -+ file->flags & IO_TMPFILE ? _(",tmpfile") : ""); - if (fstat64(file->fd, &st) < 0) { - perror("fstat64"); - } else { -@@ -143,10 +160,13 @@ openfile( - oflags |= O_TRUNC; - if (flags & IO_NONBLOCK) - oflags |= O_NONBLOCK; -+ if (flags & IO_TMPFILE) -+ oflags |= O_TMPFILE; - - fd = open(path, oflags, mode); - if (fd < 0) { -- if ((errno == EISDIR) && (oflags & O_RDWR)) { -+ if (errno == EISDIR && -+ ((oflags & (O_RDWR|O_TMPFILE)) == O_RDWR)) { - /* make it as if we asked for O_RDONLY & try again */ - oflags &= ~O_RDWR; - oflags |= O_RDONLY; -@@ -248,6 +268,7 @@ open_help(void) - " -s -- open with O_SYNC\n" - " -t -- open with O_TRUNC (truncate the file to zero length if it exists)\n" - " -R -- mark the file as a realtime XFS file immediately after opening it\n" -+" -T -- open with O_TMPFILE (create a file not visible in the namespace)\n" - " Note1: usually read/write direct IO requests must be blocksize aligned;\n" - " some kernels, however, allow sectorsize alignment for direct IO.\n" - " Note2: the bmap for non-regular files can be obtained provided the file\n" -@@ -272,7 +293,7 @@ open_f( - return 0; - } - -- while ((c = getopt(argc, argv, "FRacdfm:nrstx")) != EOF) { -+ while ((c = getopt(argc, argv, "FRTacdfm:nrstx")) != EOF) { - switch (c) { - case 'F': - /* Ignored / deprecated now, handled automatically */ -@@ -310,6 +331,9 @@ open_f( - case 'x': /* backwards compatibility */ - flags |= IO_REALTIME; - break; -+ case 'T': -+ flags |= IO_TMPFILE; -+ break; - default: - return command_usage(&open_cmd); - } -@@ -318,6 +342,11 @@ open_f( - if (optind != argc - 1) - return command_usage(&open_cmd); - -+ if ((flags & (IO_READONLY|IO_TMPFILE)) == (IO_READONLY|IO_TMPFILE)) { -+ fprintf(stderr, _("-T and -r options are incompatible\n")); -+ return -1; -+ } -+ - fd = openfile(argv[optind], &geometry, flags, mode); - if (fd < 0) - return 0; -@@ -731,7 +760,7 @@ open_init(void) - open_cmd.argmin = 0; - open_cmd.argmax = -1; - open_cmd.flags = CMD_NOMAP_OK | CMD_NOFILE_OK | CMD_FOREIGN_OK; -- open_cmd.args = _("[-acdrstx] [path]"); -+ open_cmd.args = _("[-acdrstxT] [path]"); - open_cmd.oneline = _("open the file specified by path"); - open_cmd.help = open_help; - -diff --git a/io/prealloc.c b/io/prealloc.c -index 8380646..aba6b44 100644 ---- a/io/prealloc.c -+++ b/io/prealloc.c -@@ -29,6 +29,14 @@ - #define FALLOC_FL_PUNCH_HOLE 0x02 - #endif - -+#ifndef FALLOC_FL_COLLAPSE_RANGE -+#define FALLOC_FL_COLLAPSE_RANGE 0x08 -+#endif -+ -+#ifndef FALLOC_FL_ZERO_RANGE -+#define FALLOC_FL_ZERO_RANGE 0x10 -+#endif -+ - static cmdinfo_t allocsp_cmd; - static cmdinfo_t freesp_cmd; - static cmdinfo_t resvsp_cmd; -@@ -37,6 +45,8 @@ static cmdinfo_t zero_cmd; - #if defined(HAVE_FALLOCATE) - static cmdinfo_t falloc_cmd; - static cmdinfo_t fpunch_cmd; -+static cmdinfo_t fcollapse_cmd; -+static cmdinfo_t fzero_cmd; - #endif - - static int -@@ -159,8 +169,11 @@ fallocate_f( - int mode = 0; - int c; - -- while ((c = getopt(argc, argv, "kp")) != EOF) { -+ while ((c = getopt(argc, argv, "ckp")) != EOF) { - switch (c) { -+ case 'c': -+ mode = FALLOC_FL_COLLAPSE_RANGE; -+ break; - case 'k': - mode = FALLOC_FL_KEEP_SIZE; - break; -@@ -203,6 +216,50 @@ fpunch_f( - } - return 0; - } -+ -+static int -+fcollapse_f( -+ int argc, -+ char **argv) -+{ -+ xfs_flock64_t segment; -+ int mode = FALLOC_FL_COLLAPSE_RANGE; -+ -+ if (!offset_length(argv[1], argv[2], &segment)) -+ return 0; -+ -+ if (fallocate(file->fd, mode, -+ segment.l_start, segment.l_len)) { -+ perror("fallocate"); -+ return 0; -+ } -+ return 0; -+} -+ -+static int -+fzero_f( -+ int argc, -+ char **argv) -+{ -+ xfs_flock64_t segment; -+ int mode = FALLOC_FL_ZERO_RANGE; -+ int index = 1; -+ -+ if (strncmp(argv[index], "-k", 3) == 0) { -+ mode |= FALLOC_FL_KEEP_SIZE; -+ index++; -+ } -+ -+ if (!offset_length(argv[index], argv[index + 1], &segment)) -+ return 0; -+ -+ if (fallocate(file->fd, mode, -+ segment.l_start, segment.l_len)) { -+ perror("fallocate"); -+ return 0; -+ } -+ return 0; -+} - #endif /* HAVE_FALLOCATE */ - - void -@@ -263,9 +320,9 @@ prealloc_init(void) - falloc_cmd.argmin = 2; - falloc_cmd.argmax = -1; - falloc_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; -- falloc_cmd.args = _("[-k] [-p] off len"); -+ falloc_cmd.args = _("[-c] [-k] [-p] off len"); - falloc_cmd.oneline = -- _("allocates space associated with part of a file via fallocate"); -+ _("allocates space associated with part of a file via fallocate"); - add_command(&falloc_cmd); - - fpunch_cmd.name = "fpunch"; -@@ -275,7 +332,27 @@ prealloc_init(void) - fpunch_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; - fpunch_cmd.args = _("off len"); - fpunch_cmd.oneline = -- _("de-allocates space assocated with part of a file via fallocate"); -+ _("de-allocates space assocated with part of a file via fallocate"); - add_command(&fpunch_cmd); -+ -+ fcollapse_cmd.name = "fcollapse"; -+ fcollapse_cmd.cfunc = fcollapse_f; -+ fcollapse_cmd.argmin = 2; -+ fcollapse_cmd.argmax = 2; -+ fcollapse_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; -+ fcollapse_cmd.args = _("off len"); -+ fcollapse_cmd.oneline = -+ _("de-allocates space and eliminates the hole by shifting extents"); -+ add_command(&fcollapse_cmd); -+ -+ fzero_cmd.name = "fzero"; -+ fzero_cmd.cfunc = fzero_f; -+ fzero_cmd.argmin = 2; -+ fzero_cmd.argmax = 3; -+ fzero_cmd.flags = CMD_NOMAP_OK | CMD_FOREIGN_OK; -+ fzero_cmd.args = _("[-k] off len"); -+ fzero_cmd.oneline = -+ _("zeroes space and eliminates holes by preallocating"); -+ add_command(&fzero_cmd); - #endif /* HAVE_FALLOCATE */ - } -diff --git a/libxfs/cache.c b/libxfs/cache.c -index 84d2860..dc69689 100644 ---- a/libxfs/cache.c -+++ b/libxfs/cache.c -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - - #define CACHE_DEBUG 1 - #undef CACHE_DEBUG -@@ -61,6 +62,7 @@ cache_init( - cache->c_misses = 0; - cache->c_maxcount = maxcount; - cache->c_hashsize = hashsize; -+ cache->c_hashshift = libxfs_highbit32(hashsize); - cache->hash = cache_operations->hash; - cache->alloc = cache_operations->alloc; - cache->flush = cache_operations->flush; -@@ -343,7 +345,7 @@ cache_node_get( - int priority = 0; - int purged = 0; - -- hashidx = cache->hash(key, cache->c_hashsize); -+ hashidx = cache->hash(key, cache->c_hashsize, cache->c_hashshift); - hash = cache->c_hash + hashidx; - head = &hash->ch_list; - -@@ -515,7 +517,8 @@ cache_node_purge( - struct cache_hash * hash; - int count = -1; - -- hash = cache->c_hash + cache->hash(key, cache->c_hashsize); -+ hash = cache->c_hash + cache->hash(key, cache->c_hashsize, -+ cache->c_hashshift); - head = &hash->ch_list; - pthread_mutex_lock(&hash->ch_mutex); - for (pos = head->next, n = pos->next; pos != head; -diff --git a/libxfs/init.h b/libxfs/init.h -index f0b8cb6..112febb 100644 ---- a/libxfs/init.h -+++ b/libxfs/init.h -@@ -31,7 +31,6 @@ extern char *platform_findrawpath (char *path); - extern char *platform_findblockpath (char *path); - extern int platform_direct_blockdev (void); - extern int platform_align_blockdev (void); --extern int platform_nproc(void); - extern unsigned long platform_physmem(void); /* in kilobytes */ - extern int platform_has_uuid; - -diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c -index 0219a08..1b691fb 100644 ---- a/libxfs/rdwr.c -+++ b/libxfs/rdwr.c -@@ -203,7 +203,8 @@ xfs_buf_t *libxfs_readbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, - int, int, const struct xfs_buf_ops *); - int libxfs_writebuf(xfs_buf_t *, int); - xfs_buf_t *libxfs_getbuf(struct xfs_buftarg *, xfs_daddr_t, int); --xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, int); -+xfs_buf_t *libxfs_getbuf_map(struct xfs_buftarg *, struct xfs_buf_map *, -+ int, int); - xfs_buf_t *libxfs_getbuf_flags(struct xfs_buftarg *, xfs_daddr_t, int, - unsigned int); - void libxfs_putbuf (xfs_buf_t *); -@@ -255,9 +256,10 @@ libxfs_trace_getbuf(const char *func, const char *file, int line, - - xfs_buf_t * - libxfs_trace_getbuf_map(const char *func, const char *file, int line, -- struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) -+ struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, -+ int flags) - { -- xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps); -+ xfs_buf_t *bp = libxfs_getbuf_map(btp, map, nmaps, flags); - __add_trace(bp, func, file, line); - return bp; - } -@@ -311,10 +313,18 @@ struct xfs_bufkey { - int nmaps; - }; - -+/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ -+#define GOLDEN_RATIO_PRIME 0x9e37fffffffc0001UL -+#define CACHE_LINE_SIZE 64 - static unsigned int --libxfs_bhash(cache_key_t key, unsigned int hashsize) -+libxfs_bhash(cache_key_t key, unsigned int hashsize, unsigned int hashshift) - { -- return (((unsigned int)((struct xfs_bufkey *)key)->blkno) >> 5) % hashsize; -+ uint64_t hashval = ((struct xfs_bufkey *)key)->blkno; -+ uint64_t tmp; -+ -+ tmp = hashval ^ (GOLDEN_RATIO_PRIME + hashval) / CACHE_LINE_SIZE; -+ tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> hashshift); -+ return tmp % hashsize; - } - - static int -@@ -582,11 +592,16 @@ libxfs_getbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len) - } - - struct xfs_buf * --libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) -+libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, -+ int nmaps, int flags) - { - struct xfs_bufkey key = {0}; - int i; - -+ if (nmaps == 1) -+ return libxfs_getbuf_flags(btp, map[0].bm_bn, map[0].bm_len, -+ flags); -+ - key.buftarg = btp; - key.blkno = map[0].bm_bn; - for (i = 0; i < nmaps; i++) { -@@ -595,7 +610,7 @@ libxfs_getbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps) - key.map = map; - key.nmaps = nmaps; - -- return __cache_lookup(&key, 0); -+ return __cache_lookup(&key, flags); - } - - void -@@ -724,27 +739,19 @@ libxfs_readbuf(struct xfs_buftarg *btp, xfs_daddr_t blkno, int len, int flags, - } - - int --libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, -- struct xfs_buf_map *map, int nmaps, int flags) -+libxfs_readbufr_map(struct xfs_buftarg *btp, struct xfs_buf *bp, int flags) - { - int fd = libxfs_device_to_fd(btp->dev); - int error = 0; - char *buf; - int i; - -- ASSERT(BBTOB(len) <= bp->b_bcount); -- -- ASSERT(bp->b_nmaps == nmaps); -- - fd = libxfs_device_to_fd(btp->dev); - buf = bp->b_addr; - for (i = 0; i < bp->b_nmaps; i++) { - off64_t offset = LIBXFS_BBTOOFF64(bp->b_map[i].bm_bn); - int len = BBTOB(bp->b_map[i].bm_len); - -- ASSERT(bp->b_map[i].bm_bn == map[i].bm_bn); -- ASSERT(bp->b_map[i].bm_len == map[i].bm_len); -- - error = __read_buf(fd, buf, len, offset, flags); - if (error) { - bp->b_error = error; -@@ -775,7 +782,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, - return libxfs_readbuf(btp, map[0].bm_bn, map[0].bm_len, - flags, ops); - -- bp = libxfs_getbuf_map(btp, map, nmaps); -+ bp = libxfs_getbuf_map(btp, map, nmaps, 0); - if (!bp) - return NULL; - -@@ -784,7 +791,7 @@ libxfs_readbuf_map(struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, - if ((bp->b_flags & (LIBXFS_B_UPTODATE|LIBXFS_B_DIRTY))) - return bp; - -- error = libxfs_readbufr_map(btp, bp, map, nmaps, flags); -+ error = libxfs_readbufr_map(btp, bp, flags); - if (!error) { - bp->b_flags |= LIBXFS_B_UPTODATE; - if (bp->b_ops) -@@ -891,6 +898,11 @@ libxfs_writebufr(xfs_buf_t *bp) - int - libxfs_writebuf_int(xfs_buf_t *bp, int flags) - { -+ /* -+ * Clear any error hanging over from reading the buffer. This prevents -+ * subsequent reads after this write from seeing stale errors. -+ */ -+ bp->b_error = 0; - bp->b_flags |= (LIBXFS_B_DIRTY | flags); - return 0; - } -@@ -904,6 +916,11 @@ libxfs_writebuf(xfs_buf_t *bp, int flags) - (long long)LIBXFS_BBTOOFF64(bp->b_bn), - (long long)bp->b_bn); - #endif -+ /* -+ * Clear any error hanging over from reading the buffer. This prevents -+ * subsequent reads after this write from seeing stale errors. -+ */ -+ bp->b_error = 0; - bp->b_flags |= (LIBXFS_B_DIRTY | flags); - libxfs_putbuf(bp); - return 0; -diff --git a/libxfs/trans.c b/libxfs/trans.c -index 6a05673..c443863 100644 ---- a/libxfs/trans.c -+++ b/libxfs/trans.c -@@ -511,7 +511,7 @@ libxfs_trans_get_buf_map( - xfs_buf_log_item_t *bip; - - if (tp == NULL) -- return libxfs_getbuf_map(btp, map, nmaps); -+ return libxfs_getbuf_map(btp, map, nmaps, 0); - - bp = xfs_trans_buf_item_match(tp, btp, map, nmaps); - if (bp != NULL) { -@@ -522,7 +522,7 @@ libxfs_trans_get_buf_map( - return bp; - } - -- bp = libxfs_getbuf_map(btp, map, nmaps); -+ bp = libxfs_getbuf_map(btp, map, nmaps, 0); - if (bp == NULL) - return NULL; - #ifdef XACT_DEBUG -@@ -694,7 +694,6 @@ inode_item_done( - xfs_mount_t *mp; - xfs_buf_t *bp; - int error; -- extern kmem_zone_t *xfs_ili_zone; - - ip = iip->ili_inode; - mp = iip->ili_item.li_mountp; -@@ -736,15 +735,9 @@ ili_done: - if (iip->ili_lock_flags) { - iip->ili_lock_flags = 0; - return; -- } else { -- libxfs_iput(ip, 0); - } -- -- if (ip->i_itemp) -- kmem_zone_free(xfs_ili_zone, ip->i_itemp); -- else -- ASSERT(0); -- ip->i_itemp = NULL; -+ /* free the inode */ -+ libxfs_iput(ip, 0); - } - - static void -diff --git a/libxfs/util.c b/libxfs/util.c -index 8109ab3..1b05540 100644 ---- a/libxfs/util.c -+++ b/libxfs/util.c -@@ -730,3 +730,16 @@ cmn_err(int level, char *fmt, ...) - fputs("\n", stderr); - va_end(ap); - } -+ -+/* -+ * Warnings specifically for verifier errors. Differentiate CRC vs. invalid -+ * values, and omit the stack trace unless the error level is tuned high. -+ */ -+void -+xfs_verifier_error( -+ struct xfs_buf *bp) -+{ -+ xfs_alert(NULL, "Metadata %s detected at block 0x%llx/0x%x", -+ bp->b_error == EFSBADCRC ? "CRC error" : "corruption", -+ bp->b_bn, BBTOB(bp->b_length)); -+} -diff --git a/libxfs/xfs.h b/libxfs/xfs.h -index 364fd83..5a21590 100644 ---- a/libxfs/xfs.h -+++ b/libxfs/xfs.h -@@ -449,3 +449,4 @@ int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); - void xfs_trans_mod_sb(xfs_trans_t *, uint, long); - void xfs_trans_init(struct xfs_mount *); - int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); -+void xfs_verifier_error(struct xfs_buf *bp); -diff --git a/libxfs/xfs_alloc.c b/libxfs/xfs_alloc.c -index e4fb1ad..6c82be0 100644 ---- a/libxfs/xfs_alloc.c -+++ b/libxfs/xfs_alloc.c -@@ -452,7 +452,6 @@ xfs_agfl_read_verify( - struct xfs_buf *bp) - { - struct xfs_mount *mp = bp->b_target->bt_mount; -- int agfl_ok = 1; - - /* - * There is no verification of non-crc AGFLs because mkfs does not -@@ -463,15 +462,13 @@ xfs_agfl_read_verify( - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - -- agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_agfl, agfl_crc)); -- -- agfl_ok = agfl_ok && xfs_agfl_verify(bp); -- -- if (!agfl_ok) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_agfl_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -486,16 +483,15 @@ xfs_agfl_write_verify( - return; - - if (!xfs_agfl_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - - if (bip) - XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_agfl, agfl_crc)); -+ xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); - } - - const struct xfs_buf_ops xfs_agfl_buf_ops = { -@@ -2218,19 +2214,17 @@ xfs_agf_read_verify( - struct xfs_buf *bp) - { - struct xfs_mount *mp = bp->b_target->bt_mount; -- int agf_ok = 1; -- -- if (xfs_sb_version_hascrc(&mp->m_sb)) -- agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_agf, agf_crc)); - -- agf_ok = agf_ok && xfs_agf_verify(mp, bp); -- -- if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, -- XFS_RANDOM_ALLOC_READ_AGF))) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (xfs_sb_version_hascrc(&mp->m_sb) && -+ !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, -+ XFS_ERRTAG_ALLOC_READ_AGF, -+ XFS_RANDOM_ALLOC_READ_AGF)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -2241,8 +2235,8 @@ xfs_agf_write_verify( - struct xfs_buf_log_item *bip = bp->b_fspriv; - - if (!xfs_agf_verify(mp, bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -2252,8 +2246,7 @@ xfs_agf_write_verify( - if (bip) - XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_agf, agf_crc)); -+ xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); - } - - const struct xfs_buf_ops xfs_agf_buf_ops = { -diff --git a/libxfs/xfs_alloc_btree.c b/libxfs/xfs_alloc_btree.c -index 282a320..215be7e 100644 ---- a/libxfs/xfs_alloc_btree.c -+++ b/libxfs/xfs_alloc_btree.c -@@ -337,12 +337,14 @@ static void - xfs_allocbt_read_verify( - struct xfs_buf *bp) - { -- if (!(xfs_btree_sblock_verify_crc(bp) && -- xfs_allocbt_verify(bp))) { -- trace_xfs_btree_corrupt(bp, _RET_IP_); -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- bp->b_target->bt_mount, bp->b_addr); -+ if (!xfs_btree_sblock_verify_crc(bp)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_allocbt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ -+ if (bp->b_error) { -+ trace_xfs_btree_corrupt(bp, _RET_IP_); -+ xfs_verifier_error(bp); - } - } - -@@ -352,9 +354,9 @@ xfs_allocbt_write_verify( - { - if (!xfs_allocbt_verify(bp)) { - trace_xfs_btree_corrupt(bp, _RET_IP_); -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); -+ return; - } - xfs_btree_sblock_calc_crc(bp); - -diff --git a/libxfs/xfs_attr_leaf.c b/libxfs/xfs_attr_leaf.c -index fd52397..f7f02ae 100644 ---- a/libxfs/xfs_attr_leaf.c -+++ b/libxfs/xfs_attr_leaf.c -@@ -187,8 +187,8 @@ xfs_attr3_leaf_write_verify( - struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; - - if (!xfs_attr3_leaf_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -198,7 +198,7 @@ xfs_attr3_leaf_write_verify( - if (bip) - hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); - } - - /* -@@ -213,13 +213,14 @@ xfs_attr3_leaf_read_verify( - { - struct xfs_mount *mp = bp->b_target->bt_mount; - -- if ((xfs_sb_version_hascrc(&mp->m_sb) && -- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_ATTR3_LEAF_CRC_OFF)) || -- !xfs_attr3_leaf_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (xfs_sb_version_hascrc(&mp->m_sb) && -+ !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_attr3_leaf_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { -diff --git a/libxfs/xfs_attr_remote.c b/libxfs/xfs_attr_remote.c -index 59bb12d..5cf5c73 100644 ---- a/libxfs/xfs_attr_remote.c -+++ b/libxfs/xfs_attr_remote.c -@@ -100,7 +100,6 @@ xfs_attr3_rmt_read_verify( - struct xfs_mount *mp = bp->b_target->bt_mount; - char *ptr; - int len; -- bool corrupt = false; - xfs_daddr_t bno; - - /* no verification of non-crc buffers */ -@@ -115,11 +114,11 @@ xfs_attr3_rmt_read_verify( - while (len > 0) { - if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), - XFS_ATTR3_RMT_CRC_OFF)) { -- corrupt = true; -+ xfs_buf_ioerror(bp, EFSBADCRC); - break; - } - if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { -- corrupt = true; -+ xfs_buf_ioerror(bp, EFSCORRUPTED); - break; - } - len -= XFS_LBSIZE(mp); -@@ -127,10 +126,9 @@ xfs_attr3_rmt_read_verify( - bno += mp->m_bsize; - } - -- if (corrupt) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -- xfs_buf_ioerror(bp, EFSCORRUPTED); -- } else -+ if (bp->b_error) -+ xfs_verifier_error(bp); -+ else - ASSERT(len == 0); - } - -@@ -155,9 +153,8 @@ xfs_attr3_rmt_write_verify( - - while (len > 0) { - if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { -- XFS_CORRUPTION_ERROR(__func__, -- XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - if (bip) { -diff --git a/libxfs/xfs_bmap_btree.c b/libxfs/xfs_bmap_btree.c -index 3136e4f..a4bd69d 100644 ---- a/libxfs/xfs_bmap_btree.c -+++ b/libxfs/xfs_bmap_btree.c -@@ -759,12 +759,14 @@ static void - xfs_bmbt_read_verify( - struct xfs_buf *bp) - { -- if (!(xfs_btree_lblock_verify_crc(bp) && -- xfs_bmbt_verify(bp))) { -- trace_xfs_btree_corrupt(bp, _RET_IP_); -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- bp->b_target->bt_mount, bp->b_addr); -+ if (!xfs_btree_lblock_verify_crc(bp)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_bmbt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ -+ if (bp->b_error) { -+ trace_xfs_btree_corrupt(bp, _RET_IP_); -+ xfs_verifier_error(bp); - } - } - -@@ -773,11 +775,9 @@ xfs_bmbt_write_verify( - struct xfs_buf *bp) - { - if (!xfs_bmbt_verify(bp)) { -- xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn); - trace_xfs_btree_corrupt(bp, _RET_IP_); -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - xfs_btree_lblock_calc_crc(bp); -diff --git a/libxfs/xfs_btree.c b/libxfs/xfs_btree.c -index 2dd6fb7..9be4abd 100644 ---- a/libxfs/xfs_btree.c -+++ b/libxfs/xfs_btree.c -@@ -218,8 +218,7 @@ xfs_btree_lblock_calc_crc( - return; - if (bip) - block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_BTREE_LBLOCK_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); - } - - bool -@@ -227,8 +226,8 @@ xfs_btree_lblock_verify_crc( - struct xfs_buf *bp) - { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) -- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_BTREE_LBLOCK_CRC_OFF); -+ return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); -+ - return true; - } - -@@ -251,8 +250,7 @@ xfs_btree_sblock_calc_crc( - return; - if (bip) - block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_BTREE_SBLOCK_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); - } - - bool -@@ -260,8 +258,8 @@ xfs_btree_sblock_verify_crc( - struct xfs_buf *bp) - { - if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) -- return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_BTREE_SBLOCK_CRC_OFF); -+ return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); -+ - return true; - } - -diff --git a/libxfs/xfs_da_btree.c b/libxfs/xfs_da_btree.c -index 53414f5..154adb1 100644 ---- a/libxfs/xfs_da_btree.c -+++ b/libxfs/xfs_da_btree.c -@@ -209,8 +209,8 @@ xfs_da3_node_write_verify( - struct xfs_da3_node_hdr *hdr3 = bp->b_addr; - - if (!xfs_da3_node_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -220,7 +220,7 @@ xfs_da3_node_write_verify( - if (bip) - hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); - } - - /* -@@ -233,18 +233,20 @@ static void - xfs_da3_node_read_verify( - struct xfs_buf *bp) - { -- struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_da_blkinfo *info = bp->b_addr; - - switch (be16_to_cpu(info->magic)) { - case XFS_DA3_NODE_MAGIC: -- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_DA3_NODE_CRC_OFF)) -+ if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { -+ xfs_buf_ioerror(bp, EFSBADCRC); - break; -+ } - /* fall through */ - case XFS_DA_NODE_MAGIC: -- if (!xfs_da3_node_verify(bp)) -+ if (!xfs_da3_node_verify(bp)) { -+ xfs_buf_ioerror(bp, EFSCORRUPTED); - break; -+ } - return; - case XFS_ATTR_LEAF_MAGIC: - case XFS_ATTR3_LEAF_MAGIC: -@@ -261,8 +263,7 @@ xfs_da3_node_read_verify( - } - - /* corrupt block */ -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -- xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - } - - const struct xfs_buf_ops xfs_da3_node_buf_ops = { -diff --git a/libxfs/xfs_dir2.c b/libxfs/xfs_dir2.c -index 96a3c1d..4c8c836 100644 ---- a/libxfs/xfs_dir2.c -+++ b/libxfs/xfs_dir2.c -@@ -20,6 +20,22 @@ - - struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; - -+/* -+ * @mode, if set, indicates that the type field needs to be set up. -+ * This uses the transformation from file mode to DT_* as defined in linux/fs.h -+ * for file type specification. This will be propagated into the directory -+ * structure if appropriate for the given operation and filesystem config. -+ */ -+const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = { -+ [0] = XFS_DIR3_FT_UNKNOWN, -+ [S_IFREG >> S_SHIFT] = XFS_DIR3_FT_REG_FILE, -+ [S_IFDIR >> S_SHIFT] = XFS_DIR3_FT_DIR, -+ [S_IFCHR >> S_SHIFT] = XFS_DIR3_FT_CHRDEV, -+ [S_IFBLK >> S_SHIFT] = XFS_DIR3_FT_BLKDEV, -+ [S_IFIFO >> S_SHIFT] = XFS_DIR3_FT_FIFO, -+ [S_IFSOCK >> S_SHIFT] = XFS_DIR3_FT_SOCK, -+ [S_IFLNK >> S_SHIFT] = XFS_DIR3_FT_SYMLINK, -+}; - - /* - * ASCII case-insensitive (ie. A-Z) support for directories that was -diff --git a/libxfs/xfs_dir2_block.c b/libxfs/xfs_dir2_block.c -index 1d8f598..cede01f 100644 ---- a/libxfs/xfs_dir2_block.c -+++ b/libxfs/xfs_dir2_block.c -@@ -70,13 +70,14 @@ xfs_dir3_block_read_verify( - { - struct xfs_mount *mp = bp->b_target->bt_mount; - -- if ((xfs_sb_version_hascrc(&mp->m_sb) && -- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_DIR3_DATA_CRC_OFF)) || -- !xfs_dir3_block_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (xfs_sb_version_hascrc(&mp->m_sb) && -+ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_dir3_block_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -88,8 +89,8 @@ xfs_dir3_block_write_verify( - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_block_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -99,7 +100,7 @@ xfs_dir3_block_write_verify( - if (bip) - hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); - } - - const struct xfs_buf_ops xfs_dir3_block_buf_ops = { -diff --git a/libxfs/xfs_dir2_data.c b/libxfs/xfs_dir2_data.c -index 189699f..dc9df4d 100644 ---- a/libxfs/xfs_dir2_data.c -+++ b/libxfs/xfs_dir2_data.c -@@ -208,7 +208,6 @@ static void - xfs_dir3_data_reada_verify( - struct xfs_buf *bp) - { -- struct xfs_mount *mp = bp->b_target->bt_mount; - struct xfs_dir2_data_hdr *hdr = bp->b_addr; - - switch (hdr->magic) { -@@ -222,8 +221,8 @@ xfs_dir3_data_reada_verify( - xfs_dir3_data_verify(bp); - return; - default: -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - break; - } - } -@@ -234,13 +233,14 @@ xfs_dir3_data_read_verify( - { - struct xfs_mount *mp = bp->b_target->bt_mount; - -- if ((xfs_sb_version_hascrc(&mp->m_sb) && -- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_DIR3_DATA_CRC_OFF)) || -- !xfs_dir3_data_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (xfs_sb_version_hascrc(&mp->m_sb) && -+ !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_dir3_data_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -252,8 +252,8 @@ xfs_dir3_data_write_verify( - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_data_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -263,7 +263,7 @@ xfs_dir3_data_write_verify( - if (bip) - hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); - } - - const struct xfs_buf_ops xfs_dir3_data_buf_ops = { -diff --git a/libxfs/xfs_dir2_leaf.c b/libxfs/xfs_dir2_leaf.c -index 683536e..8e0cbc9 100644 ---- a/libxfs/xfs_dir2_leaf.c -+++ b/libxfs/xfs_dir2_leaf.c -@@ -206,13 +206,14 @@ __read_verify( - { - struct xfs_mount *mp = bp->b_target->bt_mount; - -- if ((xfs_sb_version_hascrc(&mp->m_sb) && -- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_DIR3_LEAF_CRC_OFF)) || -- !xfs_dir3_leaf_verify(bp, magic)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (xfs_sb_version_hascrc(&mp->m_sb) && -+ !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_dir3_leaf_verify(bp, magic)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -225,8 +226,8 @@ __write_verify( - struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_leaf_verify(bp, magic)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -236,7 +237,7 @@ __write_verify( - if (bip) - hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); - } - - static void -diff --git a/libxfs/xfs_dir2_node.c b/libxfs/xfs_dir2_node.c -index ced8c58..3256756 100644 ---- a/libxfs/xfs_dir2_node.c -+++ b/libxfs/xfs_dir2_node.c -@@ -98,13 +98,14 @@ xfs_dir3_free_read_verify( - { - struct xfs_mount *mp = bp->b_target->bt_mount; - -- if ((xfs_sb_version_hascrc(&mp->m_sb) && -- !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- XFS_DIR3_FREE_CRC_OFF)) || -- !xfs_dir3_free_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (xfs_sb_version_hascrc(&mp->m_sb) && -+ !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_dir3_free_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -116,8 +117,8 @@ xfs_dir3_free_write_verify( - struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; - - if (!xfs_dir3_free_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -127,7 +128,7 @@ xfs_dir3_free_write_verify( - if (bip) - hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); -+ xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); - } - - const struct xfs_buf_ops xfs_dir3_free_buf_ops = { -diff --git a/libxfs/xfs_dquot_buf.c b/libxfs/xfs_dquot_buf.c -index 6bbb0ff..e089ec8 100644 ---- a/libxfs/xfs_dquot_buf.c -+++ b/libxfs/xfs_dquot_buf.c -@@ -237,10 +237,13 @@ xfs_dquot_buf_read_verify( - { - struct xfs_mount *mp = bp->b_target->bt_mount; - -- if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (!xfs_dquot_buf_verify_crc(mp, bp)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_dquot_buf_verify(mp, bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - /* -@@ -255,8 +258,8 @@ xfs_dquot_buf_write_verify( - struct xfs_mount *mp = bp->b_target->bt_mount; - - if (!xfs_dquot_buf_verify(mp, bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - } -diff --git a/libxfs/xfs_ialloc.c b/libxfs/xfs_ialloc.c -index afe1a82..c19d84a 100644 ---- a/libxfs/xfs_ialloc.c -+++ b/libxfs/xfs_ialloc.c -@@ -1551,18 +1551,17 @@ xfs_agi_read_verify( - struct xfs_buf *bp) - { - struct xfs_mount *mp = bp->b_target->bt_mount; -- int agi_ok = 1; - -- if (xfs_sb_version_hascrc(&mp->m_sb)) -- agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_agi, agi_crc)); -- agi_ok = agi_ok && xfs_agi_verify(bp); -- -- if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, -- XFS_RANDOM_IALLOC_READ_AGI))) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (xfs_sb_version_hascrc(&mp->m_sb) && -+ !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, -+ XFS_ERRTAG_IALLOC_READ_AGI, -+ XFS_RANDOM_IALLOC_READ_AGI)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -1573,8 +1572,8 @@ xfs_agi_write_verify( - struct xfs_buf_log_item *bip = bp->b_fspriv; - - if (!xfs_agi_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -1583,8 +1582,7 @@ xfs_agi_write_verify( - - if (bip) - XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_agi, agi_crc)); -+ xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); - } - - const struct xfs_buf_ops xfs_agi_buf_ops = { -diff --git a/libxfs/xfs_ialloc_btree.c b/libxfs/xfs_ialloc_btree.c -index 27a5dd9..0a29d73 100644 ---- a/libxfs/xfs_ialloc_btree.c -+++ b/libxfs/xfs_ialloc_btree.c -@@ -224,12 +224,14 @@ static void - xfs_inobt_read_verify( - struct xfs_buf *bp) - { -- if (!(xfs_btree_sblock_verify_crc(bp) && -- xfs_inobt_verify(bp))) { -- trace_xfs_btree_corrupt(bp, _RET_IP_); -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- bp->b_target->bt_mount, bp->b_addr); -+ if (!xfs_btree_sblock_verify_crc(bp)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_inobt_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ -+ if (bp->b_error) { -+ trace_xfs_btree_corrupt(bp, _RET_IP_); -+ xfs_verifier_error(bp); - } - } - -@@ -239,9 +241,9 @@ xfs_inobt_write_verify( - { - if (!xfs_inobt_verify(bp)) { - trace_xfs_btree_corrupt(bp, _RET_IP_); -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- bp->b_target->bt_mount, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); -+ return; - } - xfs_btree_sblock_calc_crc(bp); - -diff --git a/libxfs/xfs_inode_buf.c b/libxfs/xfs_inode_buf.c -index d245d72..de16ed9 100644 ---- a/libxfs/xfs_inode_buf.c -+++ b/libxfs/xfs_inode_buf.c -@@ -88,8 +88,7 @@ xfs_inode_buf_verify( - } - - xfs_buf_ioerror(bp, EFSCORRUPTED); -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, -- mp, dip); -+ xfs_verifier_error(bp); - #ifdef DEBUG - xfs_alert(mp, - "bad inode magic/vsn daddr %lld #%d (magic=%x)", -@@ -292,7 +291,7 @@ xfs_dinode_verify( - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return false; - if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, -- offsetof(struct xfs_dinode, di_crc))) -+ XFS_DINODE_CRC_OFF)) - return false; - if (be64_to_cpu(dip->di_ino) != ino) - return false; -@@ -313,7 +312,7 @@ xfs_dinode_calc_crc( - - ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); - crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, -- offsetof(struct xfs_dinode, di_crc)); -+ XFS_DINODE_CRC_OFF); - dip->di_crc = xfs_end_cksum(crc); - } - -diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c -index 48b1a97..7ee4612 100644 ---- a/libxfs/xfs_sb.c -+++ b/libxfs/xfs_sb.c -@@ -258,6 +258,7 @@ xfs_mount_validate_sb( - sbp->sb_inodelog < XFS_DINODE_MIN_LOG || - sbp->sb_inodelog > XFS_DINODE_MAX_LOG || - sbp->sb_inodesize != (1 << sbp->sb_inodelog) || -+ sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || - (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || - (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || - (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || -@@ -265,8 +266,7 @@ xfs_mount_validate_sb( - sbp->sb_dblocks == 0 || - sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || - sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { -- XFS_CORRUPTION_ERROR("SB sanity check failed", -- XFS_ERRLEVEL_LOW, mp, sbp); -+ xfs_notice(mp, "SB sanity check failed"); - return XFS_ERROR(EFSCORRUPTED); - } - -@@ -542,6 +542,11 @@ xfs_sb_verify( - * single bit error could clear the feature bit and unused parts of the - * superblock are supposed to be zero. Hence a non-null crc field indicates that - * we've potentially lost a feature bit and we should check it anyway. -+ * -+ * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the -+ * last field in V4 secondary superblocks. So for secondary superblocks, -+ * we are more forgiving, and ignore CRC failures if the primary doesn't -+ * indicate that the fs version is V5. - */ - static void - xfs_sb_read_verify( -@@ -560,20 +565,22 @@ xfs_sb_read_verify( - XFS_SB_VERSION_5) || - dsb->sb_crc != 0)) { - -- if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), -- offsetof(struct xfs_sb, sb_crc))) { -- error = EFSCORRUPTED; -- goto out_error; -+ if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { -+ /* Only fail bad secondaries on a known V5 filesystem */ -+ if (bp->b_bn == XFS_SB_DADDR || -+ xfs_sb_version_hascrc(&mp->m_sb)) { -+ error = EFSBADCRC; -+ goto out_error; -+ } - } - } - error = xfs_sb_verify(bp, true); - - out_error: - if (error) { -- if (error != EWRONGFS) -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- mp, bp->b_addr); - xfs_buf_ioerror(bp, error); -+ if (error == EFSCORRUPTED || error == EFSBADCRC) -+ xfs_verifier_error(bp); - } - } - -@@ -589,7 +596,6 @@ xfs_sb_quiet_read_verify( - { - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); - -- - if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { - /* XFS filesystem, verify noisily! */ - xfs_sb_read_verify(bp); -@@ -609,9 +615,8 @@ xfs_sb_write_verify( - - error = xfs_sb_verify(bp, false); - if (error) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -- mp, bp->b_addr); - xfs_buf_ioerror(bp, error); -+ xfs_verifier_error(bp); - return; - } - -@@ -621,8 +626,7 @@ xfs_sb_write_verify( - if (bip) - XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); - -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_sb, sb_crc)); -+ xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); - } - - const struct xfs_buf_ops xfs_sb_buf_ops = { -diff --git a/libxfs/xfs_symlink_remote.c b/libxfs/xfs_symlink_remote.c -index 539db0c..ebf60ac 100644 ---- a/libxfs/xfs_symlink_remote.c -+++ b/libxfs/xfs_symlink_remote.c -@@ -116,12 +116,13 @@ xfs_symlink_read_verify( - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - -- if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_dsymlink_hdr, sl_crc)) || -- !xfs_symlink_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -+ if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) -+ xfs_buf_ioerror(bp, EFSBADCRC); -+ else if (!xfs_symlink_verify(bp)) - xfs_buf_ioerror(bp, EFSCORRUPTED); -- } -+ -+ if (bp->b_error) -+ xfs_verifier_error(bp); - } - - static void -@@ -136,8 +137,8 @@ xfs_symlink_write_verify( - return; - - if (!xfs_symlink_verify(bp)) { -- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); - xfs_buf_ioerror(bp, EFSCORRUPTED); -+ xfs_verifier_error(bp); - return; - } - -@@ -145,8 +146,7 @@ xfs_symlink_write_verify( - struct xfs_dsymlink_hdr *dsl = bp->b_addr; - dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); - } -- xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -- offsetof(struct xfs_dsymlink_hdr, sl_crc)); -+ xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); - } - - const struct xfs_buf_ops xfs_symlink_buf_ops = { -diff --git a/logprint/log_misc.c b/logprint/log_misc.c -index cf9d59d..52f1e85 100644 ---- a/logprint/log_misc.c -+++ b/logprint/log_misc.c -@@ -810,7 +810,6 @@ xlog_print_trans_icreate( - - memmove(&icl_buf, *ptr, MIN(sizeof(struct xfs_icreate_log), len)); - icl = &icl_buf; -- (*i)++; - *ptr += len; - - /* handle complete header only */ -@@ -874,7 +873,7 @@ xlog_print_record( - int bad_hdr_warn) - { - xfs_caddr_t buf, ptr; -- int read_len, skip; -+ int read_len, skip, lost_context = 0; - int ret, n, i, j, k; - - if (print_no_print) -@@ -995,7 +994,10 @@ xlog_print_record( - if (xlog_print_find_tid(be32_to_cpu(op_head->oh_tid), - op_head->oh_flags & XLOG_WAS_CONT_TRANS)) { - printf(_("Left over region from split log item\n")); -+ /* Skip this leftover bit */ - ptr += be32_to_cpu(op_head->oh_len); -+ /* We've lost context; don't complain if next one looks bad too */ -+ lost_context = 1; - continue; - } - -@@ -1050,7 +1052,7 @@ xlog_print_record( - break; - } - default: { -- if (bad_hdr_warn) { -+ if (bad_hdr_warn && !lost_context) { - fprintf(stderr, - _("%s: unknown log operation type (%x)\n"), - progname, *(unsigned short *)ptr); -@@ -1064,6 +1066,7 @@ xlog_print_record( - } - skip = 0; - ptr += be32_to_cpu(op_head->oh_len); -+ lost_context = 0; - } - } /* switch */ - } /* else */ -diff --git a/man/man8/xfs_io.8 b/man/man8/xfs_io.8 -index 767b50e..0dec0b7 100644 ---- a/man/man8/xfs_io.8 -+++ b/man/man8/xfs_io.8 -@@ -4,7 +4,7 @@ xfs_io \- debug the I/O path of an XFS filesystem - .SH SYNOPSIS - .B xfs_io - [ --.B \-adfmrRstx -+.B \-adfmrRstxT - ] [ - .B \-c - .I cmd -@@ -88,7 +88,7 @@ command for more details on any command. - Display a list of all open files and (optionally) switch to an alternate - current open file. - .TP --.BI "open [[ \-acdfrstR ] " path " ]" -+.BI "open [[ \-acdfrstRT ] " path " ]" - Closes the current file, and opens the file specified by - .I path - instead. Without any arguments, displays statistics about the current -@@ -119,6 +119,14 @@ truncates on open (O_TRUNC). - .B \-n - opens in non-blocking mode if possible (O_NONBLOCK). - .TP -+.B \-T -+create a temporary file not linked into the filesystem namespace -+(O_TMPFILE). The pathname passed must refer to a directory which -+is treated as virtual parent for the newly created invisible file. -+Can not be used together with the -+.B \-r -+option. -+.TP - .B \-R - marks the file as a realtime XFS file after - opening it, if it is not already marked as such. -@@ -380,12 +388,23 @@ will set the FALLOC_FL_KEEP_SIZE flag as described in - .PD - .RE - .TP -+.BI fcollapse " offset length" -+Call fallocate with FALLOC_FL_COLLAPSE_RANGE flag as described in the -+.BR fallocate (2) -+manual page to de-allocates blocks and eliminates the hole created in this process -+by shifting data blocks into the hole. -+.TP - .BI fpunch " offset length" - Punches (de-allocates) blocks in the file by calling fallocate with - the FALLOC_FL_PUNCH_HOLE flag as described in the - .BR fallocate (2) - manual page. - .TP -+.BI fzero " offset length" -+Call fallocate with FALLOC_FL_ZERO_RANGE flag as described in the -+.BR fallocate (2) -+manual page to allocate and zero blocks within the range. -+.TP - .BI truncate " offset" - Truncates the current file at the given offset using - .BR ftruncate (2). -diff --git a/man/man8/xfs_metadump.8 b/man/man8/xfs_metadump.8 -index 4fa1b1c..077fff5 100644 ---- a/man/man8/xfs_metadump.8 -+++ b/man/man8/xfs_metadump.8 -@@ -4,7 +4,7 @@ xfs_metadump \- copy XFS filesystem metadata to a file - .SH SYNOPSIS - .B xfs_metadump - [ --.B \-efgow -+.B \-efFgow - ] [ - .B \-m - .I max_extents -@@ -86,6 +86,11 @@ file option). This can also happen if an image copy of a filesystem has - been made into an ordinary file with - .BR xfs_copy (8). - .TP -+.B \-F -+Specifies that we want to continue even if the superblock magic is not correct. -+If the source is truly not an XFS filesystem, the resulting image will be useless, -+and xfs_metadump may crash. -+.TP - .B \-g - Shows dump progress. This is sent to stdout if the - .I target -diff --git a/man/man8/xfs_repair.8 b/man/man8/xfs_repair.8 -index ed20fb7..b7c2d8c 100644 ---- a/man/man8/xfs_repair.8 -+++ b/man/man8/xfs_repair.8 -@@ -144,7 +144,7 @@ reduce repair times on concat based filesystems. - .BI force_geometry - Check the filesystem even if geometry information could not be validated. - Geometry information can not be validated if only a single allocation --group and exist and thus we do not have a backup superblock available, or -+group exists and thus we do not have a backup superblock available, or - if there are two allocation groups and the two superblocks do not - agree on the filesystem geometry. Only use this option if you validated - the geometry yourself and know what you are doing. If In doubt run -diff --git a/mkfs/proto.c b/mkfs/proto.c -index 4cc0df6..4d3680d 100644 ---- a/mkfs/proto.c -+++ b/mkfs/proto.c -@@ -438,6 +438,7 @@ parseproto( - creds.cr_gid = (int)getnum(pp); - xname.name = (uchar_t *)name; - xname.len = name ? strlen(name) : 0; -+ xname.type = 0; - tp = libxfs_trans_alloc(mp, 0); - flags = XFS_ILOG_CORE; - xfs_bmap_init(&flist, &first); -@@ -453,6 +454,7 @@ parseproto( - if (buf) - free(buf); - libxfs_trans_ijoin(tp, pip, 0); -+ xname.type = XFS_DIR3_FT_REG_FILE; - newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); - libxfs_trans_ihold(tp, pip); - break; -@@ -469,6 +471,7 @@ parseproto( - - libxfs_trans_ijoin(tp, pip, 0); - -+ xname.type = XFS_DIR3_FT_REG_FILE; - newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); - libxfs_trans_ihold(tp, pip); - libxfs_trans_log_inode(tp, ip, flags); -@@ -490,6 +493,7 @@ parseproto( - fail(_("Inode allocation failed"), error); - } - libxfs_trans_ijoin(tp, pip, 0); -+ xname.type = XFS_DIR3_FT_BLKDEV; - newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); - libxfs_trans_ihold(tp, pip); - flags |= XFS_ILOG_DEV; -@@ -504,6 +508,7 @@ parseproto( - if (error) - fail(_("Inode allocation failed"), error); - libxfs_trans_ijoin(tp, pip, 0); -+ xname.type = XFS_DIR3_FT_CHRDEV; - newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); - libxfs_trans_ihold(tp, pip); - flags |= XFS_ILOG_DEV; -@@ -516,6 +521,7 @@ parseproto( - if (error) - fail(_("Inode allocation failed"), error); - libxfs_trans_ijoin(tp, pip, 0); -+ xname.type = XFS_DIR3_FT_FIFO; - newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); - libxfs_trans_ihold(tp, pip); - break; -@@ -529,6 +535,7 @@ parseproto( - fail(_("Inode allocation failed"), error); - flags |= newfile(tp, ip, &flist, &first, 1, 1, buf, len); - libxfs_trans_ijoin(tp, pip, 0); -+ xname.type = XFS_DIR3_FT_SYMLINK; - newdirent(mp, tp, pip, &xname, ip->i_ino, &first, &flist); - libxfs_trans_ihold(tp, pip); - break; -@@ -546,6 +553,7 @@ parseproto( - isroot = 1; - } else { - libxfs_trans_ijoin(tp, pip, 0); -+ xname.type = XFS_DIR3_FT_DIR; - newdirent(mp, tp, pip, &xname, ip->i_ino, - &first, &flist); - pip->i_d.di_nlink++; -diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c -index d82128c..f7cf394 100644 ---- a/mkfs/xfs_mkfs.c -+++ b/mkfs/xfs_mkfs.c -@@ -2366,32 +2366,40 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), - } else if (!loginternal && !xi.logdev) { - logblocks = 0; - } else if (loginternal && !logsize) { -- /* -- * With a 2GB max log size, default to maximum size -- * at 4TB. This keeps the same ratio from the older -- * max log size of 128M at 256GB fs size. IOWs, -- * the ratio of fs size to log size is 2048:1. -- */ -- logblocks = (dblocks << blocklog) / 2048; -- logblocks = logblocks >> blocklog; -- logblocks = MAX(min_logblocks, logblocks); - -- /* -- * If the default log size doesn't fit in the AG size, use the -- * minimum log size instead. This ensures small filesystems -- * don't use excessive amounts of space for the log. -- */ -- if (min_logblocks * XFS_DFL_LOG_FACTOR >= agsize) { -+ if (dblocks < GIGABYTES(1, blocklog)) { -+ /* tiny filesystems get minimum sized logs. */ - logblocks = min_logblocks; -+ } else if (dblocks < GIGABYTES(16, blocklog)) { -+ -+ /* -+ * For small filesystems, we want to use the -+ * XFS_MIN_LOG_BYTES for filesystems smaller than 16G if -+ * at all possible, ramping up to 128MB at 256GB. -+ */ -+ logblocks = MIN(XFS_MIN_LOG_BYTES >> blocklog, -+ min_logblocks * XFS_DFL_LOG_FACTOR); - } else { -- logblocks = MAX(logblocks, -- MAX(XFS_DFL_LOG_SIZE, -- min_logblocks * XFS_DFL_LOG_FACTOR)); -+ /* -+ * With a 2GB max log size, default to maximum size -+ * at 4TB. This keeps the same ratio from the older -+ * max log size of 128M at 256GB fs size. IOWs, -+ * the ratio of fs size to log size is 2048:1. -+ */ -+ logblocks = (dblocks << blocklog) / 2048; -+ logblocks = logblocks >> blocklog; -+ logblocks = MAX(min_logblocks, logblocks); - } -+ -+ /* make sure the log fits wholly within an AG */ -+ if (logblocks >= agsize) -+ logblocks = min_logblocks; -+ -+ /* and now clamp the size to the maximum supported size */ - logblocks = MIN(logblocks, XFS_MAX_LOG_BLOCKS); -- if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) { -+ if ((logblocks << blocklog) > XFS_MAX_LOG_BYTES) - logblocks = XFS_MAX_LOG_BYTES >> blocklog; -- } -+ - } - validate_log_size(logblocks, blocklog, min_logblocks); - -diff --git a/repair/agheader.c b/repair/agheader.c -index 53e47b6..fc5dac9 100644 ---- a/repair/agheader.c -+++ b/repair/agheader.c -@@ -472,7 +472,7 @@ verify_set_agheader(xfs_mount_t *mp, xfs_buf_t *sbuf, xfs_sb_t *sb, - int status = XR_OK; - int status_sb = XR_OK; - -- status = verify_sb(sb, (i == 0)); -+ status = verify_sb(sbuf->b_addr, sb, (i == 0)); - - if (status != XR_OK) { - do_warn(_("bad on-disk superblock %d - %s\n"), -diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c -index d3c2236..afb26e0 100644 ---- a/repair/dino_chunks.c -+++ b/repair/dino_chunks.c -@@ -141,7 +141,7 @@ verify_inode_chunk(xfs_mount_t *mp, - if (check_aginode_block(mp, agno, agino) == 0) - return 0; - -- pthread_mutex_lock(&ag_locks[agno]); -+ pthread_mutex_lock(&ag_locks[agno].lock); - - state = get_bmap(agno, agbno); - switch (state) { -@@ -166,7 +166,7 @@ verify_inode_chunk(xfs_mount_t *mp, - _("inode block %d/%d multiply claimed, (state %d)\n"), - agno, agbno, state); - set_bmap(agno, agbno, XR_E_MULT); -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - return(0); - default: - do_warn( -@@ -176,7 +176,7 @@ verify_inode_chunk(xfs_mount_t *mp, - break; - } - -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - - start_agino = XFS_OFFBNO_TO_AGINO(mp, agbno, 0); - *start_ino = XFS_AGINO_TO_INO(mp, agno, start_agino); -@@ -424,7 +424,7 @@ verify_inode_chunk(xfs_mount_t *mp, - * user data -- we're probably here as a result of a directory - * entry or an iunlinked pointer - */ -- pthread_mutex_lock(&ag_locks[agno]); -+ pthread_mutex_lock(&ag_locks[agno].lock); - for (cur_agbno = chunk_start_agbno; - cur_agbno < chunk_stop_agbno; - cur_agbno += blen) { -@@ -438,7 +438,7 @@ verify_inode_chunk(xfs_mount_t *mp, - _("inode block %d/%d multiply claimed, (state %d)\n"), - agno, cur_agbno, state); - set_bmap_ext(agno, cur_agbno, blen, XR_E_MULT); -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - return 0; - case XR_E_INO: - do_error( -@@ -449,7 +449,7 @@ verify_inode_chunk(xfs_mount_t *mp, - break; - } - } -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - - /* - * ok, chunk is good. put the record into the tree if required, -@@ -472,7 +472,7 @@ verify_inode_chunk(xfs_mount_t *mp, - - set_inode_used(irec_p, agino - start_agino); - -- pthread_mutex_lock(&ag_locks[agno]); -+ pthread_mutex_lock(&ag_locks[agno].lock); - - for (cur_agbno = chunk_start_agbno; - cur_agbno < chunk_stop_agbno; -@@ -505,7 +505,7 @@ verify_inode_chunk(xfs_mount_t *mp, - break; - } - } -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - - return(ino_cnt); - } -@@ -736,7 +736,7 @@ process_inode_chunk( - /* - * mark block as an inode block in the incore bitmap - */ -- pthread_mutex_lock(&ag_locks[agno]); -+ pthread_mutex_lock(&ag_locks[agno].lock); - state = get_bmap(agno, agbno); - switch (state) { - case XR_E_INO: /* already marked */ -@@ -755,7 +755,7 @@ process_inode_chunk( - XFS_AGB_TO_FSB(mp, agno, agbno), state); - break; - } -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - - for (;;) { - /* -@@ -788,6 +788,8 @@ process_inode_chunk( - * we do now, this is where to start. - */ - if (is_used) { -+ __uint16_t di_mode; -+ - if (is_inode_free(ino_rec, irec_offset)) { - if (verbose || no_modify) { - do_warn( -@@ -803,6 +805,15 @@ process_inode_chunk( - set_inode_used(ino_rec, irec_offset); - - /* -+ * store the on-disk file type for comparing in -+ * phase 6. -+ */ -+ di_mode = be16_to_cpu(dino->di_mode); -+ di_mode = (di_mode & S_IFMT) >> S_SHIFT; -+ set_inode_ftype(ino_rec, irec_offset, -+ xfs_mode_to_ftype[di_mode]); -+ -+ /* - * store on-disk nlink count for comparing in phase 7 - */ - set_inode_disk_nlinks(ino_rec, irec_offset, -@@ -914,7 +925,7 @@ process_inode_chunk( - ibuf_offset = 0; - agbno++; - -- pthread_mutex_lock(&ag_locks[agno]); -+ pthread_mutex_lock(&ag_locks[agno].lock); - state = get_bmap(agno, agbno); - switch (state) { - case XR_E_INO: /* already marked */ -@@ -935,7 +946,7 @@ process_inode_chunk( - XFS_AGB_TO_FSB(mp, agno, agbno), state); - break; - } -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - - } else if (irec_offset == XFS_INODES_PER_CHUNK) { - /* -diff --git a/repair/dinode.c b/repair/dinode.c -index 7469fc8..48f17ac 100644 ---- a/repair/dinode.c -+++ b/repair/dinode.c -@@ -32,6 +32,37 @@ - #include "threads.h" - - /* -+ * gettext lookups for translations of strings use mutexes internally to -+ * the library. Hence when we come through here doing parallel scans in -+ * multiple AGs, then all do concurrent text conversions and serialise -+ * on the translation string lookups. Let's avoid doing repeated lookups -+ * by making them static variables and only assigning the translation -+ * once. -+ */ -+static char *forkname_data; -+static char *forkname_attr; -+static char *ftype_real_time; -+static char *ftype_regular; -+ -+void -+dinode_bmbt_translation_init(void) -+{ -+ forkname_data = _("data"); -+ forkname_attr = _("attr"); -+ ftype_real_time = _("real-time"); -+ ftype_regular = _("regular"); -+} -+ -+char * -+get_forkname(int whichfork) -+{ -+ -+ if (whichfork == XFS_DATA_FORK) -+ return forkname_data; -+ return forkname_attr; -+} -+ -+/* - * inode clearing routines - */ - -@@ -542,7 +573,7 @@ process_bmbt_reclist_int( - xfs_dfiloff_t op = 0; /* prev offset */ - xfs_dfsbno_t b; - char *ftype; -- char *forkname; -+ char *forkname = get_forkname(whichfork); - int i; - int state; - xfs_agnumber_t agno; -@@ -552,15 +583,10 @@ process_bmbt_reclist_int( - xfs_agnumber_t locked_agno = -1; - int error = 1; - -- if (whichfork == XFS_DATA_FORK) -- forkname = _("data"); -- else -- forkname = _("attr"); -- - if (type == XR_INO_RTDATA) -- ftype = _("real-time"); -+ ftype = ftype_real_time; - else -- ftype = _("regular"); -+ ftype = ftype_regular; - - for (i = 0; i < *numrecs; i++) { - libxfs_bmbt_disk_get_all(rp + i, &irec); -@@ -651,9 +677,10 @@ _("inode %" PRIu64 " - extent offset too large - start %" PRIu64 ", " - } - - if (blkmapp && *blkmapp) { -- error = blkmap_set_ext(blkmapp, irec.br_startoff, -+ int error2; -+ error2 = blkmap_set_ext(blkmapp, irec.br_startoff, - irec.br_startblock, irec.br_blockcount); -- if (error) { -+ if (error2) { - /* - * we don't want to clear the inode due to an - * internal bmap tracking error, but if we've -@@ -665,7 +692,7 @@ _("inode %" PRIu64 " - extent offset too large - start %" PRIu64 ", " - do_abort( - _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n" - "\t%s fork, off - %" PRIu64 ", start - %" PRIu64 ", cnt %" PRIu64 "\n"), -- ino, strerror(error), forkname, -+ ino, strerror(error2), forkname, - irec.br_startoff, irec.br_startblock, - irec.br_blockcount); - } -@@ -680,8 +707,8 @@ _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n" - ebno = agbno + irec.br_blockcount; - if (agno != locked_agno) { - if (locked_agno != -1) -- pthread_mutex_unlock(&ag_locks[locked_agno]); -- pthread_mutex_lock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[locked_agno].lock); -+ pthread_mutex_lock(&ag_locks[agno].lock); - locked_agno = agno; - } - -@@ -750,7 +777,7 @@ _("illegal state %d in block map %" PRIu64 "\n"), - error = 0; - done: - if (locked_agno != -1) -- pthread_mutex_unlock(&ag_locks[locked_agno]); -+ pthread_mutex_unlock(&ag_locks[locked_agno].lock); - - if (i != *numrecs) { - ASSERT(i < *numrecs); -@@ -1109,7 +1136,7 @@ process_btinode( - xfs_ino_t lino; - xfs_bmbt_ptr_t *pp; - xfs_bmbt_key_t *pkey; -- char *forkname; -+ char *forkname = get_forkname(whichfork); - int i; - int level; - int numrecs; -@@ -1121,11 +1148,6 @@ process_btinode( - *tot = 0; - *nex = 0; - -- if (whichfork == XFS_DATA_FORK) -- forkname = _("data"); -- else -- forkname = _("attr"); -- - magic = xfs_sb_version_hascrc(&mp->m_sb) ? XFS_BMAP_CRC_MAGIC - : XFS_BMAP_MAGIC; - -diff --git a/repair/dinode.h b/repair/dinode.h -index d9197c1..5ee51ca 100644 ---- a/repair/dinode.h -+++ b/repair/dinode.h -@@ -18,9 +18,8 @@ - #ifndef _XR_DINODE_H - #define _XR_DINODE_H - --#include "prefetch.h" -- - struct blkmap; -+struct prefetch_args; - - int - verify_agbno(xfs_mount_t *mp, -@@ -103,12 +102,12 @@ int - process_uncertain_aginodes(xfs_mount_t *mp, - xfs_agnumber_t agno); - void --process_aginodes(xfs_mount_t *mp, -- prefetch_args_t *pf_args, -- xfs_agnumber_t agno, -- int check_dirs, -- int check_dups, -- int extra_attr_check); -+process_aginodes(xfs_mount_t *mp, -+ struct prefetch_args *pf_args, -+ xfs_agnumber_t agno, -+ int check_dirs, -+ int check_dups, -+ int extra_attr_check); - - void - check_uncertain_aginodes(xfs_mount_t *mp, -@@ -127,4 +126,7 @@ get_bmapi(xfs_mount_t *mp, - xfs_dfiloff_t bno, - int whichfork ); - -+void dinode_bmbt_translation_init(void); -+char * get_forkname(int whichfork); -+ - #endif /* _XR_DINODE_H */ -diff --git a/repair/dir2.c b/repair/dir2.c -index 3aabcaa..06dc000 100644 ---- a/repair/dir2.c -+++ b/repair/dir2.c -@@ -552,7 +552,7 @@ _("can't read block %u for directory inode %" PRIu64 "\n"), - - newnode = bp->b_addr; - btree = xfs_da3_node_tree_p(newnode); -- xfs_da3_node_hdr_from_disk(&nodehdr, node); -+ xfs_da3_node_hdr_from_disk(&nodehdr, newnode); - /* - * verify magic number and back pointer, sanity-check - * entry count, verify level -diff --git a/repair/globals.h b/repair/globals.h -index aef8b79..f6e0a22 100644 ---- a/repair/globals.h -+++ b/repair/globals.h -@@ -49,7 +49,8 @@ - #define XR_BAD_SB_UNIT 17 /* bad stripe unit */ - #define XR_BAD_SB_WIDTH 18 /* bad stripe width */ - #define XR_BAD_SVN 19 /* bad shared version number */ --#define XR_BAD_ERR_CODE 20 /* Bad error code */ -+#define XR_BAD_CRC 20 /* Bad CRC */ -+#define XR_BAD_ERR_CODE 21 /* Bad error code */ - - /* XFS filesystem (il)legal values */ - -@@ -186,7 +187,10 @@ EXTERN xfs_extlen_t sb_inoalignmt; - EXTERN __uint32_t sb_unit; - EXTERN __uint32_t sb_width; - --EXTERN pthread_mutex_t *ag_locks; -+struct aglock { -+ pthread_mutex_t lock __attribute__((__aligned__(64))); -+}; -+EXTERN struct aglock *ag_locks; - - EXTERN int report_interval; - EXTERN __uint64_t *prog_rpt_done; -diff --git a/repair/incore.c b/repair/incore.c -index 3590464..a8d497e 100644 ---- a/repair/incore.c -+++ b/repair/incore.c -@@ -294,13 +294,13 @@ init_bmaps(xfs_mount_t *mp) - if (!ag_bmap) - do_error(_("couldn't allocate block map btree roots\n")); - -- ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(pthread_mutex_t)); -+ ag_locks = calloc(mp->m_sb.sb_agcount, sizeof(struct aglock)); - if (!ag_locks) - do_error(_("couldn't allocate block map locks\n")); - - for (i = 0; i < mp->m_sb.sb_agcount; i++) { - btree_init(&ag_bmap[i]); -- pthread_mutex_init(&ag_locks[i], NULL); -+ pthread_mutex_init(&ag_locks[i].lock, NULL); - } - - init_rt_bmap(mp); -diff --git a/repair/incore.h b/repair/incore.h -index 38caa6d..5419884 100644 ---- a/repair/incore.h -+++ b/repair/incore.h -@@ -293,6 +293,7 @@ typedef struct ino_tree_node { - ino_ex_data_t *ex_data; /* phases 6,7 */ - parent_list_t *plist; /* phases 2-5 */ - } ino_un; -+ __uint8_t *ftypes; /* phases 3,6 */ - } ino_tree_node_t; - - #define INOS_PER_IREC (sizeof(__uint64_t) * NBBY) -@@ -359,7 +360,8 @@ ino_tree_node_t *find_uncertain_inode_rec(xfs_agnumber_t agno, - xfs_agino_t ino); - void add_inode_uncertain(xfs_mount_t *mp, - xfs_ino_t ino, int free); --void add_aginode_uncertain(xfs_agnumber_t agno, -+void add_aginode_uncertain(struct xfs_mount *mp, -+ xfs_agnumber_t agno, - xfs_agino_t agino, int free); - void get_uncertain_inode_rec(struct xfs_mount *mp, - xfs_agnumber_t agno, -@@ -476,6 +478,29 @@ static inline void add_inode_reached(struct ino_tree_node *irec, int offset) - } - - /* -+ * get/set inode filetype. Only used if the superblock feature bit is set -+ * which allocates irec->ftypes. -+ */ -+static inline void -+set_inode_ftype(struct ino_tree_node *irec, -+ int ino_offset, -+ __uint8_t ftype) -+{ -+ if (irec->ftypes) -+ irec->ftypes[ino_offset] = ftype; -+} -+ -+static inline __uint8_t -+get_inode_ftype( -+ struct ino_tree_node *irec, -+ int ino_offset) -+{ -+ if (!irec->ftypes) -+ return XFS_DIR3_FT_UNKNOWN; -+ return irec->ftypes[ino_offset]; -+} -+ -+/* - * set/get inode number of parent -- works for directory inodes only - */ - void set_inode_parent(ino_tree_node_t *irec, int ino_offset, -diff --git a/repair/incore_ino.c b/repair/incore_ino.c -index 735737a..9502648 100644 ---- a/repair/incore_ino.c -+++ b/repair/incore_ino.c -@@ -211,6 +211,21 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) - return 0; - } - -+static __uint8_t * -+alloc_ftypes_array( -+ struct xfs_mount *mp) -+{ -+ __uint8_t *ptr; -+ -+ if (!xfs_sb_version_hasftype(&mp->m_sb)) -+ return NULL; -+ -+ ptr = calloc(XFS_INODES_PER_CHUNK, sizeof(*ptr)); -+ if (!ptr) -+ do_error(_("could not allocate ftypes array\n")); -+ return ptr; -+} -+ - /* - * Next is the uncertain inode list -- a sorted (in ascending order) - * list of inode records sorted on the starting inode number. There -@@ -226,6 +241,7 @@ __uint32_t get_inode_disk_nlinks(struct ino_tree_node *irec, int ino_offset) - */ - static struct ino_tree_node * - alloc_ino_node( -+ struct xfs_mount *mp, - xfs_agino_t starting_ino) - { - struct ino_tree_node *irec; -@@ -245,6 +261,7 @@ alloc_ino_node( - irec->ino_un.ex_data = NULL; - irec->nlink_size = sizeof(__uint8_t); - irec->disk_nlinks.un8 = alloc_nlink_array(irec->nlink_size); -+ irec->ftypes = alloc_ftypes_array(mp); - return irec; - } - -@@ -285,6 +302,7 @@ free_ino_tree_node( - - } - -+ free(irec->ftypes); - free(irec); - } - -@@ -303,7 +321,11 @@ static ino_tree_node_t **last_rec; - * free is set to 1 if the inode is thought to be free, 0 if used - */ - void --add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) -+add_aginode_uncertain( -+ struct xfs_mount *mp, -+ xfs_agnumber_t agno, -+ xfs_agino_t ino, -+ int free) - { - ino_tree_node_t *ino_rec; - xfs_agino_t s_ino; -@@ -334,7 +356,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) - ino_rec = (ino_tree_node_t *) - avl_findrange(inode_uncertain_tree_ptrs[agno], s_ino); - if (!ino_rec) { -- ino_rec = alloc_ino_node(s_ino); -+ ino_rec = alloc_ino_node(mp, s_ino); - - if (!avl_insert(inode_uncertain_tree_ptrs[agno], - &ino_rec->avl_node)) -@@ -360,7 +382,7 @@ add_aginode_uncertain(xfs_agnumber_t agno, xfs_agino_t ino, int free) - void - add_inode_uncertain(xfs_mount_t *mp, xfs_ino_t ino, int free) - { -- add_aginode_uncertain(XFS_INO_TO_AGNO(mp, ino), -+ add_aginode_uncertain(mp, XFS_INO_TO_AGNO(mp, ino), - XFS_INO_TO_AGINO(mp, ino), free); - } - -@@ -432,7 +454,7 @@ add_inode( - { - struct ino_tree_node *irec; - -- irec = alloc_ino_node(agino); -+ irec = alloc_ino_node(mp, agino); - if (!avl_insert(inode_tree_ptrs[agno], &irec->avl_node)) - do_warn(_("add_inode - duplicate inode range\n")); - return irec; -diff --git a/repair/init.c b/repair/init.c -index c3f380b..d0940aa 100644 ---- a/repair/init.c -+++ b/repair/init.c -@@ -97,8 +97,17 @@ xfs_init(libxfs_init_t *args) - else - args->isreadonly = LIBXFS_EXCLUSIVELY; - -- if (!libxfs_init(args)) -+ if (!libxfs_init(args)) { -+ /* would -d be an option? */ -+ if (!no_modify && !dangerously) { -+ args->isreadonly = (LIBXFS_ISINACTIVE | -+ LIBXFS_DANGEROUSLY); -+ if (libxfs_init(args)) -+ fprintf(stderr, -+_("Unmount or use the dangerous (-d) option to repair a read-only mounted filesystem\n")); -+ } - do_error(_("couldn't initialize XFS library\n")); -+ } - - ts_create(); - increase_rlimit(); -diff --git a/repair/phase1.c b/repair/phase1.c -index 62de211..ec75ada 100644 ---- a/repair/phase1.c -+++ b/repair/phase1.c -@@ -70,13 +70,14 @@ phase1(xfs_mount_t *mp) - ag_bp = alloc_ag_buf(MAX_SECTSIZE); - sb = (xfs_sb_t *) ag_bp; - -- if (get_sb(sb, 0LL, MAX_SECTSIZE, 0) == XR_EOF) -+ rval = get_sb(sb, 0LL, MAX_SECTSIZE, 0); -+ if (rval == XR_EOF) - do_error(_("error reading primary superblock\n")); - - /* - * is this really an sb, verify internal consistency - */ -- if ((rval = verify_sb(sb, 1)) != XR_OK) { -+ if (rval != XR_OK) { - do_warn(_("bad primary superblock - %s !!!\n"), - err_string(rval)); - if (!find_secondary_sb(sb)) -diff --git a/repair/phase3.c b/repair/phase3.c -index 3e43938..213d368 100644 ---- a/repair/phase3.c -+++ b/repair/phase3.c -@@ -17,6 +17,8 @@ - */ - - #include -+#include "threads.h" -+#include "prefetch.h" - #include "avl.h" - #include "globals.h" - #include "agheader.h" -@@ -24,9 +26,7 @@ - #include "protos.h" - #include "err_protos.h" - #include "dinode.h" --#include "threads.h" - #include "progress.h" --#include "prefetch.h" - - static void - process_agi_unlinked( -@@ -82,41 +82,7 @@ static void - process_ags( - xfs_mount_t *mp) - { -- int i, j; -- xfs_agnumber_t agno; -- work_queue_t *queues; -- prefetch_args_t *pf_args[2]; -- -- queues = malloc(thread_count * sizeof(work_queue_t)); -- -- if (ag_stride) { -- /* -- * create one worker thread for each segment of the volume -- */ -- for (i = 0, agno = 0; i < thread_count; i++) { -- create_work_queue(&queues[i], mp, 1); -- pf_args[0] = NULL; -- for (j = 0; j < ag_stride && agno < mp->m_sb.sb_agcount; -- j++, agno++) { -- pf_args[0] = start_inode_prefetch(agno, 0, pf_args[0]); -- queue_work(&queues[i], process_ag_func, agno, pf_args[0]); -- } -- } -- /* -- * wait for workers to complete -- */ -- for (i = 0; i < thread_count; i++) -- destroy_work_queue(&queues[i]); -- } else { -- queues[0].mp = mp; -- pf_args[0] = start_inode_prefetch(0, 0, NULL); -- for (i = 0; i < mp->m_sb.sb_agcount; i++) { -- pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 0, -- pf_args[i & 1]); -- process_ag_func(&queues[0], i, pf_args[i & 1]); -- } -- } -- free(queues); -+ do_inode_prefetch(mp, ag_stride, process_ag_func, false, false); - } - - void -diff --git a/repair/phase4.c b/repair/phase4.c -index a822aaa..189eeb9 100644 ---- a/repair/phase4.c -+++ b/repair/phase4.c -@@ -17,6 +17,8 @@ - */ - - #include -+#include "threads.h" -+#include "prefetch.h" - #include "avl.h" - #include "globals.h" - #include "agheader.h" -@@ -27,9 +29,7 @@ - #include "bmap.h" - #include "versions.h" - #include "dir2.h" --#include "threads.h" - #include "progress.h" --#include "prefetch.h" - - - /* -@@ -150,49 +150,7 @@ static void - process_ags( - xfs_mount_t *mp) - { -- int i, j; -- xfs_agnumber_t agno; -- work_queue_t *queues; -- prefetch_args_t *pf_args[2]; -- -- queues = malloc(thread_count * sizeof(work_queue_t)); -- -- if (!libxfs_bcache_overflowed()) { -- queues[0].mp = mp; -- create_work_queue(&queues[0], mp, libxfs_nproc()); -- for (i = 0; i < mp->m_sb.sb_agcount; i++) -- queue_work(&queues[0], process_ag_func, i, NULL); -- destroy_work_queue(&queues[0]); -- } else { -- if (ag_stride) { -- /* -- * create one worker thread for each segment of the volume -- */ -- for (i = 0, agno = 0; i < thread_count; i++) { -- create_work_queue(&queues[i], mp, 1); -- pf_args[0] = NULL; -- for (j = 0; j < ag_stride && agno < mp->m_sb.sb_agcount; -- j++, agno++) { -- pf_args[0] = start_inode_prefetch(agno, 0, pf_args[0]); -- queue_work(&queues[i], process_ag_func, agno, pf_args[0]); -- } -- } -- /* -- * wait for workers to complete -- */ -- for (i = 0; i < thread_count; i++) -- destroy_work_queue(&queues[i]); -- } else { -- queues[0].mp = mp; -- pf_args[0] = start_inode_prefetch(0, 0, NULL); -- for (i = 0; i < mp->m_sb.sb_agcount; i++) { -- pf_args[(~i) & 1] = start_inode_prefetch(i + 1, -- 0, pf_args[i & 1]); -- process_ag_func(&queues[0], i, pf_args[i & 1]); -- } -- } -- } -- free(queues); -+ do_inode_prefetch(mp, ag_stride, process_ag_func, true, false); - } - - -diff --git a/repair/phase6.c b/repair/phase6.c -index d2d4a44..446f3ee 100644 ---- a/repair/phase6.c -+++ b/repair/phase6.c -@@ -17,6 +17,8 @@ - */ - - #include -+#include "threads.h" -+#include "prefetch.h" - #include "avl.h" - #include "globals.h" - #include "agheader.h" -@@ -25,9 +27,7 @@ - #include "protos.h" - #include "err_protos.h" - #include "dinode.h" --#include "prefetch.h" - #include "progress.h" --#include "threads.h" - #include "versions.h" - - static struct cred zerocr; -@@ -43,13 +43,13 @@ static struct xfs_name xfs_name_dot = {(unsigned char *)".", - * entries are updated. These must be rebuilt after the initial pass - */ - typedef struct dotdot_update { -- struct dotdot_update *next; -+ struct list_head list; - ino_tree_node_t *irec; - xfs_agnumber_t agno; - int ino_offset; - } dotdot_update_t; - --static dotdot_update_t *dotdot_update_list; -+static LIST_HEAD(dotdot_update_list); - static int dotdot_update; - - static void -@@ -64,12 +64,12 @@ add_dotdot_update( - do_error(_("malloc failed add_dotdot_update (%zu bytes)\n"), - sizeof(dotdot_update_t)); - -- dir->next = dotdot_update_list; -+ INIT_LIST_HEAD(&dir->list); - dir->irec = irec; - dir->agno = agno; - dir->ino_offset = ino_offset; - -- dotdot_update_list = dir; -+ list_add(&dir->list, &dotdot_update_list); - } - - /* -@@ -134,7 +134,8 @@ dir_hash_add( - __uint32_t addr, - xfs_ino_t inum, - int namelen, -- unsigned char *name) -+ unsigned char *name, -+ __uint8_t ftype) - { - xfs_dahash_t hash = 0; - int byaddr; -@@ -148,6 +149,7 @@ dir_hash_add( - - xname.name = name; - xname.len = namelen; -+ xname.type = ftype; - - junk = name[0] == '/'; - byaddr = DIR_HASH_FUNC(hashtab, addr); -@@ -312,6 +314,23 @@ dir_hash_see( - return DIR_HASH_CK_NODATA; - } - -+static void -+dir_hash_update_ftype( -+ dir_hash_tab_t *hashtab, -+ xfs_dir2_dataptr_t addr, -+ __uint8_t ftype) -+{ -+ int i; -+ dir_hash_ent_t *p; -+ -+ i = DIR_HASH_FUNC(hashtab, addr); -+ for (p = hashtab->byaddr[i]; p; p = p->nextbyaddr) { -+ if (p->address != addr) -+ continue; -+ p->name.type = ftype; -+ } -+} -+ - /* - * checks to make sure leafs match a data entry, and that the stale - * count is valid. -@@ -1685,11 +1704,12 @@ longform_dir2_entry_check_data( - if (!orphanage_ino) - orphanage_ino = inum; - } -+ - /* - * check for duplicate names in directory. - */ - if (!dir_hash_add(mp, hashtab, addr, inum, dep->namelen, -- dep->name)) { -+ dep->name, xfs_dir3_dirent_get_ftype(mp, dep))) { - nbad++; - if (entry_junked( - _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), -@@ -1763,6 +1783,35 @@ longform_dir2_entry_check_data( - */ - if (no_modify && verify_inum(mp, inum)) - continue; -+ -+ /* validate ftype field if supported */ -+ if (xfs_sb_version_hasftype(&mp->m_sb)) { -+ __uint8_t dir_ftype; -+ __uint8_t ino_ftype; -+ -+ dir_ftype = xfs_dir3_dirent_get_ftype(mp, dep); -+ ino_ftype = get_inode_ftype(irec, ino_offset); -+ -+ if (dir_ftype != ino_ftype) { -+ if (no_modify) { -+ do_warn( -+ _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), -+ dir_ftype, ino_ftype, -+ ip->i_ino, inum); -+ } else { -+ do_warn( -+ _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), -+ dir_ftype, ino_ftype, -+ ip->i_ino, inum); -+ xfs_dir3_dirent_put_ftype(mp, dep, -+ ino_ftype); -+ libxfs_dir2_data_log_entry(tp, bp, dep); -+ dir_hash_update_ftype(hashtab, addr, -+ ino_ftype); -+ } -+ } -+ } -+ - /* - * check easy case first, regular inode, just bump - * the link count and continue -@@ -2189,6 +2238,62 @@ out_fix: - * shortform directory v2 processing routines -- entry verification and - * bad entry deletion (pruning). - */ -+static struct xfs_dir2_sf_entry * -+shortform_dir2_junk( -+ struct xfs_mount *mp, -+ struct xfs_dir2_sf_hdr *sfp, -+ struct xfs_dir2_sf_entry *sfep, -+ xfs_ino_t lino, -+ int *max_size, -+ int *index, -+ int *bytes_deleted, -+ int *ino_dirty) -+{ -+ struct xfs_dir2_sf_entry *next_sfep; -+ int next_len; -+ int next_elen; -+ -+ if (lino == orphanage_ino) -+ orphanage_ino = 0; -+ -+ next_elen = xfs_dir3_sf_entsize(mp, sfp, sfep->namelen); -+ next_sfep = (xfs_dir2_sf_entry_t *)((__psint_t)sfep + next_elen); -+ -+ /* -+ * if we are just checking, simply return the pointer to the next entry -+ * here so that the checking loop can continue. -+ */ -+ if (no_modify) { -+ do_warn(_("would junk entry\n")); -+ return next_sfep; -+ } -+ -+ /* -+ * now move all the remaining entries down over the junked entry and -+ * clear the newly unused bytes at the tail of the directory region. -+ */ -+ next_len = *max_size - ((__psint_t)next_sfep - (__psint_t)sfp); -+ *max_size -= next_elen; -+ *bytes_deleted += next_elen; -+ -+ memmove(sfep, next_sfep, next_len); -+ memset((void *)((__psint_t)sfep + next_len), 0, next_elen); -+ sfp->count -= 1; -+ *ino_dirty = 1; -+ -+ /* -+ * WARNING: drop the index i by one so it matches the decremented count -+ * for accurate comparisons in the loop test -+ */ -+ (*index)--; -+ -+ if (verbose) -+ do_warn(_("junking entry\n")); -+ else -+ do_warn("\n"); -+ return sfep; -+} -+ - static void - shortform_dir2_entry_check(xfs_mount_t *mp, - xfs_ino_t ino, -@@ -2201,15 +2306,13 @@ shortform_dir2_entry_check(xfs_mount_t *mp, - xfs_ino_t lino; - xfs_ino_t parent; - struct xfs_dir2_sf_hdr *sfp; -- xfs_dir2_sf_entry_t *sfep, *next_sfep, *tmp_sfep; -- xfs_ifork_t *ifp; -- ino_tree_node_t *irec; -+ struct xfs_dir2_sf_entry *sfep; -+ struct xfs_dir2_sf_entry *next_sfep; -+ struct xfs_ifork *ifp; -+ struct ino_tree_node *irec; - int max_size; - int ino_offset; - int i; -- int junkit; -- int tmp_len; -- int tmp_elen; - int bad_sfnamelen; - int namelen; - int bytes_deleted; -@@ -2266,9 +2369,7 @@ shortform_dir2_entry_check(xfs_mount_t *mp, - for (i = 0; i < sfp->count && max_size > - (__psint_t)next_sfep - (__psint_t)sfp; - sfep = next_sfep, i++) { -- junkit = 0; - bad_sfnamelen = 0; -- tmp_sfep = NULL; - - lino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); - -@@ -2340,7 +2441,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, - do_warn( - _("entry \"%s\" in shortform directory %" PRIu64 " references non-existent inode %" PRIu64 "\n"), - fname, ino, lino); -- goto do_junkit; -+ next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, -+ &max_size, &i, &bytes_deleted, -+ ino_dirty); -+ continue; - } - - ino_offset = XFS_INO_TO_AGINO(mp, lino) - irec->ino_startnum; -@@ -2354,7 +2458,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, - do_warn( - _("entry \"%s\" in shortform directory inode %" PRIu64 " points to free inode %" PRIu64 "\n"), - fname, ino, lino); -- goto do_junkit; -+ next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, -+ &max_size, &i, &bytes_deleted, -+ ino_dirty); -+ continue; - } - /* - * check if this inode is lost+found dir in the root -@@ -2367,7 +2474,10 @@ shortform_dir2_entry_check(xfs_mount_t *mp, - do_warn( - _("%s (ino %" PRIu64 ") in root (%" PRIu64 ") is not a directory"), - ORPHANAGE, lino, ino); -- goto do_junkit; -+ next_sfep = shortform_dir2_junk(mp, sfp, sfep, -+ lino, &max_size, &i, -+ &bytes_deleted, ino_dirty); -+ continue; - } - /* - * if this is a dup, it will be picked up below, -@@ -2381,11 +2491,15 @@ shortform_dir2_entry_check(xfs_mount_t *mp, - */ - if (!dir_hash_add(mp, hashtab, (xfs_dir2_dataptr_t) - (sfep - xfs_dir2_sf_firstentry(sfp)), -- lino, sfep->namelen, sfep->name)) { -+ lino, sfep->namelen, sfep->name, -+ xfs_dir3_sfe_get_ftype(mp, sfp, sfep))) { - do_warn( - _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), - fname, lino, ino); -- goto do_junkit; -+ next_sfep = shortform_dir2_junk(mp, sfp, sfep, lino, -+ &max_size, &i, &bytes_deleted, -+ ino_dirty); -+ continue; - } - - if (!inode_isadir(irec, ino_offset)) { -@@ -2403,11 +2517,14 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), - * the .. in the child, blow out the entry - */ - if (is_inode_reached(irec, ino_offset)) { -- junkit = 1; - do_warn( - _("entry \"%s\" in directory inode %" PRIu64 - " references already connected inode %" PRIu64 ".\n"), - fname, ino, lino); -+ next_sfep = shortform_dir2_junk(mp, sfp, sfep, -+ lino, &max_size, &i, -+ &bytes_deleted, ino_dirty); -+ continue; - } else if (parent == ino) { - add_inode_reached(irec, ino_offset); - add_inode_ref(current_irec, current_ino_offset); -@@ -2423,76 +2540,60 @@ _("entry \"%s\" (ino %" PRIu64 ") in dir %" PRIu64 " is a duplicate name"), - add_dotdot_update(XFS_INO_TO_AGNO(mp, lino), - irec, ino_offset); - } else { -- junkit = 1; - do_warn( - _("entry \"%s\" in directory inode %" PRIu64 - " not consistent with .. value (%" PRIu64 - ") in inode %" PRIu64 ",\n"), - fname, ino, parent, lino); -+ next_sfep = shortform_dir2_junk(mp, sfp, sfep, -+ lino, &max_size, &i, -+ &bytes_deleted, ino_dirty); -+ continue; - } - } - -- if (junkit) { --do_junkit: -- if (lino == orphanage_ino) -- orphanage_ino = 0; -- if (!no_modify) { -- tmp_elen = xfs_dir3_sf_entsize(mp, sfp, -- sfep->namelen); -- tmp_sfep = (xfs_dir2_sf_entry_t *) -- ((__psint_t) sfep + tmp_elen); -- tmp_len = max_size - ((__psint_t) tmp_sfep -- - (__psint_t) sfp); -- max_size -= tmp_elen; -- bytes_deleted += tmp_elen; -- -- memmove(sfep, tmp_sfep, tmp_len); -- -- sfp->count -= 1; -- memset((void *)((__psint_t)sfep + tmp_len), 0, -- tmp_elen); -+ /* validate ftype field if supported */ -+ if (xfs_sb_version_hasftype(&mp->m_sb)) { -+ __uint8_t dir_ftype; -+ __uint8_t ino_ftype; - -- /* -- * set the tmp value to the current -- * pointer so we'll process the entry -- * we just moved up -- */ -- tmp_sfep = sfep; -- -- /* -- * WARNING: drop the index i by one -- * so it matches the decremented count for -- * accurate comparisons in the loop test -- */ -- i--; -- -- *ino_dirty = 1; -+ dir_ftype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep); -+ ino_ftype = get_inode_ftype(irec, ino_offset); - -- if (verbose) -- do_warn(_("junking entry\n")); -- else -- do_warn("\n"); -- } else { -- do_warn(_("would junk entry\n")); -+ if (dir_ftype != ino_ftype) { -+ if (no_modify) { -+ do_warn( -+ _("would fix ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), -+ dir_ftype, ino_ftype, -+ ino, lino); -+ } else { -+ do_warn( -+ _("fixing ftype mismatch (%d/%d) in directory/child inode %" PRIu64 "/%" PRIu64 "\n"), -+ dir_ftype, ino_ftype, -+ ino, lino); -+ xfs_dir3_sfe_put_ftype(mp, sfp, sfep, -+ ino_ftype); -+ dir_hash_update_ftype(hashtab, -+ (xfs_dir2_dataptr_t)(sfep - xfs_dir2_sf_firstentry(sfp)), -+ ino_ftype); -+ *ino_dirty = 1; -+ } - } -- } else if (lino > XFS_DIR2_MAX_SHORT_INUM) -+ } -+ -+ if (lino > XFS_DIR2_MAX_SHORT_INUM) - i8++; - - /* -- * go onto next entry unless we've just junked an -- * entry in which the current entry pointer points -- * to an unprocessed entry. have to take into entries -- * with bad namelen into account in no modify mode since we -- * calculate size based on next_sfep. -+ * go onto next entry - we have to take entries with bad namelen -+ * into account in no modify mode since we calculate size based -+ * on next_sfep. - */ - ASSERT(no_modify || bad_sfnamelen == 0); -- -- next_sfep = (tmp_sfep == NULL) -- ? (xfs_dir2_sf_entry_t *) ((__psint_t) sfep -- + ((!bad_sfnamelen) -- ? xfs_dir3_sf_entsize(mp, sfp, sfep->namelen) -- : xfs_dir3_sf_entsize(mp, sfp, namelen))) -- : tmp_sfep; -+ next_sfep = (struct xfs_dir2_sf_entry *)((__psint_t)sfep + -+ (bad_sfnamelen -+ ? xfs_dir3_sf_entsize(mp, sfp, namelen) -+ : xfs_dir3_sf_entsize(mp, sfp, sfep->namelen))); - } - - if (sfp->i8count != i8) { -@@ -2501,6 +2602,8 @@ do_junkit: - ino); - } else { - if (i8 == 0) { -+ struct xfs_dir2_sf_entry *tmp_sfep; -+ - tmp_sfep = next_sfep; - process_sf_dir2_fixi8(mp, sfp, &tmp_sfep); - bytes_deleted += -@@ -2518,8 +2621,7 @@ do_junkit: - /* - * sync up sizes if required - */ -- if (*ino_dirty) { -- ASSERT(bytes_deleted > 0); -+ if (*ino_dirty && bytes_deleted > 0) { - ASSERT(!no_modify); - libxfs_idata_realloc(ip, -bytes_deleted, XFS_DATA_FORK); - ip->i_d.di_size -= bytes_deleted; -@@ -2897,8 +2999,15 @@ traverse_function( - if (irec->ino_isa_dir == 0) - continue; - -- if (pf_args) -+ if (pf_args) { - sem_post(&pf_args->ra_count); -+#ifdef XR_PF_TRACE -+ sem_getvalue(&pf_args->ra_count, &i); -+ pftrace( -+ "processing inode chunk %p in AG %d (sem count = %d)", -+ irec, agno, i); -+#endif -+ } - - for (i = 0; i < XFS_INODES_PER_CHUNK; i++) { - if (inode_isadir(irec, i)) -@@ -2919,9 +3028,10 @@ update_missing_dotdot_entries( - * set dotdot_update flag so processing routines do not count links - */ - dotdot_update = 1; -- while (dotdot_update_list) { -- dir = dotdot_update_list; -- dotdot_update_list = dir->next; -+ while (!list_empty(&dotdot_update_list)) { -+ dir = list_entry(dotdot_update_list.prev, struct dotdot_update, -+ list); -+ list_del(&dir->list); - process_dir_inode(mp, dir->agno, dir->irec, dir->ino_offset); - free(dir); - } -@@ -2929,23 +3039,9 @@ update_missing_dotdot_entries( - - static void - traverse_ags( -- xfs_mount_t *mp) -+ struct xfs_mount *mp) - { -- int i; -- work_queue_t queue; -- prefetch_args_t *pf_args[2]; -- -- /* -- * we always do prefetch for phase 6 as it will fill in the gaps -- * not read during phase 3 prefetch. -- */ -- queue.mp = mp; -- pf_args[0] = start_inode_prefetch(0, 1, NULL); -- for (i = 0; i < glob_agcount; i++) { -- pf_args[(~i) & 1] = start_inode_prefetch(i + 1, 1, -- pf_args[i & 1]); -- traverse_function(&queue, i, pf_args[i & 1]); -- } -+ do_inode_prefetch(mp, 0, traverse_function, false, true); - } - - void -diff --git a/repair/prefetch.c b/repair/prefetch.c -index d3491da..e47a48e 100644 ---- a/repair/prefetch.c -+++ b/repair/prefetch.c -@@ -105,11 +105,12 @@ pf_start_io_workers( - static void - pf_queue_io( - prefetch_args_t *args, -- xfs_fsblock_t fsbno, -- int blen, -+ struct xfs_buf_map *map, -+ int nmaps, - int flag) - { -- xfs_buf_t *bp; -+ struct xfs_buf *bp; -+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, map[0].bm_bn); - - /* - * Never block on a buffer lock here, given that the actual repair -@@ -117,8 +118,7 @@ pf_queue_io( - * the lock holder is either reading it from disk himself or - * completely overwriting it this behaviour is perfectly fine. - */ -- bp = libxfs_getbuf_flags(mp->m_dev, XFS_FSB_TO_DADDR(mp, fsbno), -- XFS_FSB_TO_BB(mp, blen), LIBXFS_GETBUF_TRYLOCK); -+ bp = libxfs_getbuf_map(mp->m_dev, map, nmaps, LIBXFS_GETBUF_TRYLOCK); - if (!bp) - return; - -@@ -167,6 +167,14 @@ pf_read_bmbt_reclist( - xfs_bmbt_irec_t irec; - xfs_dfilblks_t cp = 0; /* prev count */ - xfs_dfiloff_t op = 0; /* prev offset */ -+#define MAP_ARRAY_SZ 4 -+ struct xfs_buf_map map_array[MAP_ARRAY_SZ]; -+ struct xfs_buf_map *map = map_array; -+ int max_extents = MAP_ARRAY_SZ; -+ int nmaps = 0;; -+ unsigned int len = 0; -+ int ret = 0; -+ - - for (i = 0; i < numrecs; i++) { - libxfs_bmbt_disk_get_all(rp + i, &irec); -@@ -174,11 +182,11 @@ pf_read_bmbt_reclist( - if (((i > 0) && (op + cp > irec.br_startoff)) || - (irec.br_blockcount == 0) || - (irec.br_startoff >= fs_max_file_offset)) -- return 0; -+ goto out_free; - - if (!verify_dfsbno(mp, irec.br_startblock) || !verify_dfsbno(mp, - irec.br_startblock + irec.br_blockcount - 1)) -- return 0; -+ goto out_free; - - if (!args->dirs_only && ((irec.br_startoff + - irec.br_blockcount) >= mp->m_dirfreeblk)) -@@ -188,18 +196,59 @@ pf_read_bmbt_reclist( - cp = irec.br_blockcount; - - while (irec.br_blockcount) { -- unsigned int len; -+ unsigned int bm_len; - - pftrace("queuing dir extent in AG %d", args->agno); - -- len = (irec.br_blockcount > mp->m_dirblkfsbs) ? -- mp->m_dirblkfsbs : irec.br_blockcount; -- pf_queue_io(args, irec.br_startblock, len, B_DIR_META); -- irec.br_blockcount -= len; -- irec.br_startblock += len; -+ if (len + irec.br_blockcount >= mp->m_dirblkfsbs) -+ bm_len = mp->m_dirblkfsbs - len; -+ else -+ bm_len = irec.br_blockcount; -+ len += bm_len; -+ -+ map[nmaps].bm_bn = XFS_FSB_TO_DADDR(mp, -+ irec.br_startblock); -+ map[nmaps].bm_len = XFS_FSB_TO_BB(mp, bm_len); -+ nmaps++; -+ -+ if (len == mp->m_dirblkfsbs) { -+ pf_queue_io(args, map, nmaps, B_DIR_META); -+ len = 0; -+ nmaps = 0; -+ } -+ -+ irec.br_blockcount -= bm_len; -+ irec.br_startblock += bm_len; -+ -+ /* -+ * Handle very fragmented dir2 blocks with dynamically -+ * allocated buffer maps. -+ */ -+ if (nmaps >= max_extents) { -+ struct xfs_buf_map *old_map = NULL; -+ -+ if (map == map_array) { -+ old_map = map; -+ map = NULL; -+ } -+ max_extents *= 2; -+ map = realloc(map, max_extents * sizeof(*map)); -+ if (map == NULL) { -+ do_error( -+ _("couldn't malloc dir2 buffer list\n")); -+ exit(1); -+ } -+ if (old_map) -+ memcpy(map, old_map, sizeof(map_array)); -+ } -+ - } - } -- return 1; -+ ret = 1; -+out_free: -+ if (map != map_array) -+ free(map); -+ return ret; - } - - /* -@@ -249,7 +298,8 @@ pf_scanfunc_bmap( - /* - * do some validation on the block contents - */ -- if ((be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC) || -+ if ((block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) && -+ block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC)) || - (be16_to_cpu(block->bb_level) != level)) - return 0; - -@@ -395,9 +445,28 @@ pf_read_inode_dirs( - } - - /* -- * pf_batch_read must be called with the lock locked. -+ * Discontiguous buffers require multiple IOs to fill, so we can't use any -+ * linearising, hole filling algorithms on them to avoid seeks. Just remove them -+ * for the prefetch queue and read them straight into the cache and release -+ * them. - */ -+static void -+pf_read_discontig( -+ struct prefetch_args *args, -+ struct xfs_buf *bp) -+{ -+ if (!btree_delete(args->io_queue, XFS_DADDR_TO_FSB(mp, bp->b_bn))) -+ do_error(_("prefetch corruption\n")); -+ -+ pthread_mutex_unlock(&args->lock); -+ libxfs_readbufr_map(mp->m_ddev_targp, bp, 0); -+ libxfs_putbuf(bp); -+ pthread_mutex_lock(&args->lock); -+} - -+/* -+ * pf_batch_read must be called with the lock locked. -+ */ - static void - pf_batch_read( - prefetch_args_t *args, -@@ -426,8 +495,15 @@ pf_batch_read( - max_fsbno = fsbno + pf_max_fsbs; - } - while (bplist[num] && num < MAX_BUFS && fsbno < max_fsbno) { -- if (which != PF_META_ONLY || -- !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num]))) -+ /* -+ * Handle discontiguous buffers outside the seek -+ * optimised IO loop below. -+ */ -+ if ((bplist[num]->b_flags & LIBXFS_B_DISCONTIG)) { -+ pf_read_discontig(args, bplist[num]); -+ bplist[num] = NULL; -+ } else if (which != PF_META_ONLY || -+ !B_IS_INODE(XFS_BUF_PRIORITY(bplist[num]))) - num++; - if (num == MAX_BUFS) - break; -@@ -648,7 +724,7 @@ pf_queuing_worker( - irec, args->agno, i); - #endif - err = sem_trywait(&args->ra_count); -- if (err == EAGAIN) { -+ if (err < 0 && errno == EAGAIN) { - /* - * Kick the queue once we have reached the limit; - * without this the threads processing the inodes -@@ -664,10 +740,13 @@ pf_queuing_worker( - bno = XFS_AGINO_TO_AGBNO(mp, cur_irec->ino_startnum); - - do { -- pf_queue_io(args, XFS_AGB_TO_FSB(mp, args->agno, bno), -- blks_per_cluster, -- (cur_irec->ino_isa_dir != 0) ? -- B_DIR_INODE : B_INODE); -+ struct xfs_buf_map map; -+ -+ map.bm_bn = XFS_AGB_TO_DADDR(mp, args->agno, bno); -+ map.bm_len = XFS_FSB_TO_BB(mp, blks_per_cluster); -+ pf_queue_io(args, &map, 1, -+ (cur_irec->ino_isa_dir != 0) ? B_DIR_INODE -+ : B_INODE); - bno += blks_per_cluster; - num_inos += inodes_per_cluster; - } while (num_inos < XFS_IALLOC_INODES(mp)); -@@ -787,6 +866,140 @@ start_inode_prefetch( - return args; - } - -+/* -+ * prefetch_ag_range runs a prefetch-and-process loop across a range of AGs. It -+ * begins with @start+ag, and finishes with @end_ag - 1 (i.e. does not prefetch -+ * or process @end_ag). The function starts prefetch on the first AG, then loops -+ * starting prefetch on the next AG and then blocks processing the current AG as -+ * the prefetch queue brings inodes into the processing queue. -+ * -+ * There is only one prefetch taking place at a time, so the prefetch on the -+ * next AG only starts once the current AG has been completely prefetched. Hence -+ * the prefetch of the next AG will start some time before the processing of the -+ * current AG finishes, ensuring that when we iterate an start processing the -+ * next AG there is already a significant queue of inodes to process. -+ * -+ * Prefetch is done this way to prevent it from running too far ahead of the -+ * processing. Allowing it to do so can cause cache thrashing, where new -+ * prefetch causes previously prefetched buffers to be reclaimed before the -+ * processing thread uses them. This results in reading all the inodes and -+ * metadata twice per phase and it greatly slows down the processing. Hence we -+ * have to carefully control how far ahead we prefetch... -+ */ -+static void -+prefetch_ag_range( -+ struct work_queue *work, -+ xfs_agnumber_t start_ag, -+ xfs_agnumber_t end_ag, -+ bool dirs_only, -+ void (*func)(struct work_queue *, -+ xfs_agnumber_t, void *)) -+{ -+ int i; -+ struct prefetch_args *pf_args[2]; -+ -+ pf_args[start_ag & 1] = start_inode_prefetch(start_ag, dirs_only, NULL); -+ for (i = start_ag; i < end_ag; i++) { -+ /* Don't prefetch end_ag */ -+ if (i + 1 < end_ag) -+ pf_args[(~i) & 1] = start_inode_prefetch(i + 1, -+ dirs_only, pf_args[i & 1]); -+ func(work, i, pf_args[i & 1]); -+ } -+} -+ -+struct pf_work_args { -+ xfs_agnumber_t start_ag; -+ xfs_agnumber_t end_ag; -+ bool dirs_only; -+ void (*func)(struct work_queue *, xfs_agnumber_t, void *); -+}; -+ -+static void -+prefetch_ag_range_work( -+ struct work_queue *work, -+ xfs_agnumber_t unused, -+ void *args) -+{ -+ struct pf_work_args *wargs = args; -+ -+ prefetch_ag_range(work, wargs->start_ag, wargs->end_ag, -+ wargs->dirs_only, wargs->func); -+ free(args); -+} -+ -+/* -+ * Do inode prefetch in the most optimal way for the context under which repair -+ * has been run. -+ */ -+void -+do_inode_prefetch( -+ struct xfs_mount *mp, -+ int stride, -+ void (*func)(struct work_queue *, -+ xfs_agnumber_t, void *), -+ bool check_cache, -+ bool dirs_only) -+{ -+ int i; -+ struct work_queue queue; -+ struct work_queue *queues; -+ -+ /* -+ * If the previous phases of repair have not overflowed the buffer -+ * cache, then we don't need to re-read any of the metadata in the -+ * filesystem - it's all in the cache. In that case, run a thread per -+ * CPU to maximise parallelism of the queue to be processed. -+ */ -+ if (check_cache && !libxfs_bcache_overflowed()) { -+ queue.mp = mp; -+ create_work_queue(&queue, mp, libxfs_nproc()); -+ for (i = 0; i < mp->m_sb.sb_agcount; i++) -+ queue_work(&queue, func, i, NULL); -+ destroy_work_queue(&queue); -+ return; -+ } -+ -+ /* -+ * single threaded behaviour - single prefetch thread, processed -+ * directly after each AG is queued. -+ */ -+ if (!stride) { -+ queue.mp = mp; -+ prefetch_ag_range(&queue, 0, mp->m_sb.sb_agcount, -+ dirs_only, func); -+ return; -+ } -+ -+ /* -+ * create one worker thread for each segment of the volume -+ */ -+ queues = malloc(thread_count * sizeof(work_queue_t)); -+ for (i = 0; i < thread_count; i++) { -+ struct pf_work_args *wargs; -+ -+ wargs = malloc(sizeof(struct pf_work_args)); -+ wargs->start_ag = i * stride; -+ wargs->end_ag = min((i + 1) * stride, -+ mp->m_sb.sb_agcount); -+ wargs->dirs_only = dirs_only; -+ wargs->func = func; -+ -+ create_work_queue(&queues[i], mp, 1); -+ queue_work(&queues[i], prefetch_ag_range_work, 0, wargs); -+ -+ if (wargs->end_ag >= mp->m_sb.sb_agcount) -+ break; -+ } -+ -+ /* -+ * wait for workers to complete -+ */ -+ while (i--) -+ destroy_work_queue(&queues[i]); -+ free(queues); -+} -+ - void - wait_for_inode_prefetch( - prefetch_args_t *args) -diff --git a/repair/prefetch.h b/repair/prefetch.h -index 44a406c..b837752 100644 ---- a/repair/prefetch.h -+++ b/repair/prefetch.h -@@ -4,6 +4,7 @@ - #include - #include "incore.h" - -+struct work_queue; - - extern int do_prefetch; - -@@ -41,6 +42,15 @@ start_inode_prefetch( - prefetch_args_t *prev_args); - - void -+do_inode_prefetch( -+ struct xfs_mount *mp, -+ int stride, -+ void (*func)(struct work_queue *, -+ xfs_agnumber_t, void *), -+ bool check_cache, -+ bool dirs_only); -+ -+void - wait_for_inode_prefetch( - prefetch_args_t *args); - -diff --git a/repair/protos.h b/repair/protos.h -index 601f2a9..ff42fa7 100644 ---- a/repair/protos.h -+++ b/repair/protos.h -@@ -18,7 +18,8 @@ - - void xfs_init(libxfs_init_t *args); - --int verify_sb(xfs_sb_t *sb, -+int verify_sb(char *sb_buf, -+ xfs_sb_t *sb, - int is_primary_sb); - int verify_set_primary_sb(xfs_sb_t *root_sb, - int sb_index, -diff --git a/repair/sb.c b/repair/sb.c -index c54d89b..b111aca 100644 ---- a/repair/sb.c -+++ b/repair/sb.c -@@ -139,7 +139,7 @@ find_secondary_sb(xfs_sb_t *rsb) - c_bufsb = (char *)sb + i; - libxfs_sb_from_disk(&bufsb, (xfs_dsb_t *)c_bufsb); - -- if (verify_sb(&bufsb, 0) != XR_OK) -+ if (verify_sb(c_bufsb, &bufsb, 0) != XR_OK) - continue; - - do_warn(_("found candidate secondary superblock...\n")); -@@ -245,7 +245,7 @@ sb_validate_ino_align(struct xfs_sb *sb) - */ - - int --verify_sb(xfs_sb_t *sb, int is_primary_sb) -+verify_sb(char *sb_buf, xfs_sb_t *sb, int is_primary_sb) - { - __uint32_t bsize; - int i; -@@ -263,8 +263,34 @@ verify_sb(xfs_sb_t *sb, int is_primary_sb) - if (is_primary_sb && sb->sb_inprogress == 1) - return(XR_BAD_INPROGRESS); - -- /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */ -+ /* -+ * before going *any further*, validate the sector size and if the -+ * version says we should have CRCs enabled, validate that. -+ */ -+ -+ /* check to make sure sectorsize is legal 2^N, 9 <= N <= 15 */ -+ if (sb->sb_sectsize == 0) -+ return(XR_BAD_SECT_SIZE_DATA); -+ -+ bsize = 1; -+ for (i = 0; bsize < sb->sb_sectsize && -+ i < sizeof(sb->sb_sectsize) * NBBY; i++) { -+ bsize <<= 1; -+ } -+ -+ if (i < XFS_MIN_SECTORSIZE_LOG || i > XFS_MAX_SECTORSIZE_LOG) -+ return(XR_BAD_SECT_SIZE_DATA); -+ -+ /* check sb sectorsize field against sb sectlog field */ -+ if (i != sb->sb_sectlog) -+ return(XR_BAD_SECT_SIZE_DATA); -+ -+ /* sector size in range - CRC check time */ -+ if (xfs_sb_version_hascrc(sb) && -+ !xfs_verify_cksum(sb_buf, sb->sb_sectsize, XFS_SB_CRC_OFF)) -+ return XR_BAD_CRC; - -+ /* check to make sure blocksize is legal 2^N, 9 <= N <= 16 */ - if (sb->sb_blocksize == 0) - return(XR_BAD_BLOCKSIZE); - -@@ -300,26 +326,6 @@ verify_sb(xfs_sb_t *sb, int is_primary_sb) - sb->sb_inopblock != howmany(sb->sb_blocksize,sb->sb_inodesize)) - return(XR_BAD_INO_SIZE_DATA); - -- /* check to make sure sectorsize is legal 2^N, 9 <= N <= 15 */ -- -- if (sb->sb_sectsize == 0) -- return(XR_BAD_SECT_SIZE_DATA); -- -- bsize = 1; -- -- for (i = 0; bsize < sb->sb_sectsize && -- i < sizeof(sb->sb_sectsize) * NBBY; i++) { -- bsize <<= 1; -- } -- -- if (i < XFS_MIN_SECTORSIZE_LOG || i > XFS_MAX_SECTORSIZE_LOG) -- return(XR_BAD_SECT_SIZE_DATA); -- -- /* check sb sectorsize field against sb sectlog field */ -- -- if (i != sb->sb_sectlog) -- return(XR_BAD_SECT_SIZE_DATA); -- - if (xfs_sb_version_hassector(sb)) { - - /* check to make sure log sector is legal 2^N, 9 <= N <= 15 */ -@@ -482,9 +488,11 @@ write_primary_sb(xfs_sb_t *sbp, int size) - do_error(_("couldn't seek to offset 0 in filesystem\n")); - } - -- - libxfs_sb_to_disk(buf, sbp, XFS_SB_ALL_BITS); - -+ if (xfs_sb_version_hascrc(sbp)) -+ xfs_update_cksum((char *)buf, size, XFS_SB_CRC_OFF); -+ - if (write(x.dfd, buf, size) != size) { - free(buf); - do_error(_("primary superblock write failed!\n")); -@@ -494,7 +502,7 @@ write_primary_sb(xfs_sb_t *sbp, int size) - } - - /* -- * get a possible superblock -- don't check for internal consistency -+ * get a possible superblock -- checks for internal consistency - */ - int - get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) -@@ -529,9 +537,10 @@ get_sb(xfs_sb_t *sbp, xfs_off_t off, int size, xfs_agnumber_t agno) - do_error("%s\n", strerror(error)); - } - libxfs_sb_from_disk(sbp, buf); -- free(buf); - -- return (verify_sb(sbp, 0)); -+ rval = verify_sb((char *)buf, sbp, agno == 0); -+ free(buf); -+ return rval; - } - - /* returns element on list with highest reference count */ -@@ -745,13 +754,11 @@ verify_set_primary_sb(xfs_sb_t *rsb, - off = (xfs_off_t)agno * rsb->sb_agblocks << rsb->sb_blocklog; - - checked[agno] = 1; -- -- if (get_sb(sb, off, size, agno) == XR_EOF) { -- retval = XR_EOF; -+ retval = get_sb(sb, off, size, agno); -+ if (retval == XR_EOF) - goto out_free_list; -- } - -- if (verify_sb(sb, 0) == XR_OK) { -+ if (retval == XR_OK) { - /* - * save away geometry info. - * don't bother checking the sb -diff --git a/repair/scan.c b/repair/scan.c -index 49ed194..1744c32 100644 ---- a/repair/scan.c -+++ b/repair/scan.c -@@ -171,17 +171,12 @@ scan_bmapbt( - xfs_bmbt_rec_t *rp; - xfs_dfiloff_t first_key; - xfs_dfiloff_t last_key; -- char *forkname; -+ char *forkname = get_forkname(whichfork); - int numrecs; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - int state; - -- if (whichfork == XFS_DATA_FORK) -- forkname = _("data"); -- else -- forkname = _("attr"); -- - /* - * unlike the ag freeblock btrees, if anything looks wrong - * in an inode bmap tree, just bail. it's possible that -@@ -273,7 +268,7 @@ _("bad back (left) sibling pointer (saw %llu should be NULL (0))\n" - agno = XFS_FSB_TO_AGNO(mp, bno); - agbno = XFS_FSB_TO_AGBNO(mp, bno); - -- pthread_mutex_lock(&ag_locks[agno]); -+ pthread_mutex_lock(&ag_locks[agno].lock); - state = get_bmap(agno, agbno); - switch (state) { - case XR_E_UNKNOWN: -@@ -319,7 +314,7 @@ _("bad state %d, inode %" PRIu64 " bmap block 0x%" PRIx64 "\n"), - state, ino, bno); - break; - } -- pthread_mutex_unlock(&ag_locks[agno]); -+ pthread_mutex_unlock(&ag_locks[agno].lock); - } else { - /* - * attribute fork for realtime files is in the regular -@@ -866,9 +861,9 @@ _("inode rec for ino %" PRIu64 " (%d/%d) overlaps existing rec (start %d/%d)\n") - for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { - if (XFS_INOBT_IS_FREE_DISK(rp, j)) { - nfree++; -- add_aginode_uncertain(agno, ino + j, 1); -+ add_aginode_uncertain(mp, agno, ino + j, 1); - } else { -- add_aginode_uncertain(agno, ino + j, 0); -+ add_aginode_uncertain(mp, agno, ino + j, 0); - } - } - } -@@ -1229,7 +1224,6 @@ scan_ag( - do_error(_("can't get root superblock for ag %d\n"), agno); - return; - } -- - sb = (xfs_sb_t *)calloc(BBSIZE, 1); - if (!sb) { - do_error(_("can't allocate memory for superblock\n")); -diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c -index 7beffcb..08b25f0 100644 ---- a/repair/xfs_repair.c -+++ b/repair/xfs_repair.c -@@ -29,6 +29,7 @@ - #include "prefetch.h" - #include "threads.h" - #include "progress.h" -+#include "dinode.h" - - #define rounddown(x, y) (((x)/(y))*(y)) - -@@ -136,6 +137,8 @@ err_string(int err_code) - _("bad stripe width in superblock"); - err_message[XR_BAD_SVN] = - _("bad shared version number in superblock"); -+ err_message[XR_BAD_CRC] = -+ _("bad CRC in superblock"); - done = 1; - } - -@@ -528,11 +531,14 @@ main(int argc, char **argv) - xfs_buf_t *sbp; - xfs_mount_t xfs_m; - char *msgbuf; -+ struct xfs_sb psb; -+ int rval; - - progname = basename(argv[0]); - setlocale(LC_ALL, ""); - bindtextdomain(PACKAGE, LOCALEDIR); - textdomain(PACKAGE); -+ dinode_bmbt_translation_init(); - - temp_mp = &xfs_m; - setbuf(stdout, NULL); -@@ -556,13 +562,12 @@ main(int argc, char **argv) - exit(1); - } - -- /* prepare the mount structure */ -- memset(&xfs_m, 0, sizeof(xfs_mount_t)); -- libxfs_buftarg_init(&xfs_m, x.ddev, x.logdev, x.rtdev); -- sbp = libxfs_readbuf(xfs_m.m_ddev_targp, XFS_SB_DADDR, -- 1 << (XFS_MAX_SECTORSIZE_LOG - BBSHIFT), 0, -- &xfs_sb_buf_ops); -- libxfs_sb_from_disk(&xfs_m.m_sb, XFS_BUF_TO_SBP(sbp)); -+ rval = get_sb(&psb, 0, XFS_MAX_SECTORSIZE, 0); -+ if (rval != XR_OK) { -+ do_warn(_("Primary superblock bad after phase 1!\n" -+ "Exiting now.\n")); -+ exit(1); -+ } - - /* - * if the sector size of the filesystem we are trying to repair is -@@ -581,7 +586,7 @@ main(int argc, char **argv) - geom.sectsize = BBSIZE; - } - -- if (xfs_m.m_sb.sb_sectsize < geom.sectsize) { -+ if (psb.sb_sectsize < geom.sectsize) { - long old_flags; - - old_flags = fcntl(fd, F_GETFL, 0); -@@ -593,7 +598,10 @@ main(int argc, char **argv) - } - } - } -- mp = libxfs_mount(&xfs_m, &xfs_m.m_sb, x.ddev, x.logdev, x.rtdev, 0); -+ -+ /* prepare the mount structure */ -+ memset(&xfs_m, 0, sizeof(xfs_mount_t)); -+ mp = libxfs_mount(&xfs_m, &psb, x.ddev, x.logdev, x.rtdev, 0); - - if (!mp) { - fprintf(stderr, -@@ -601,8 +609,6 @@ main(int argc, char **argv) - progname); - exit(1); - } -- libxfs_putbuf(sbp); -- libxfs_purgebuf(sbp); - - /* - * set XFS-independent status vars from the mount/sb structure -@@ -627,13 +633,32 @@ main(int argc, char **argv) - * to target these for an increase in thread count. Hence a stride value - * of 15 is chosen to ensure we get at least 2 AGs being scanned at once - * on such filesystems. -+ * -+ * Limit the maximum thread count based on the available CPU power that -+ * is available. If we use too many threads, we might run out of memory -+ * and CPU power before we run out of IO concurrency. We limit to 8 -+ * threads/CPU as this is enough threads to saturate a CPU on fast -+ * devices, yet few enough that it will saturate but won't overload slow -+ * devices. - */ - if (!ag_stride && glob_agcount >= 16 && do_prefetch) - ag_stride = 15; - - if (ag_stride) { -+ int max_threads = platform_nproc() * 8; -+ - thread_count = (glob_agcount + ag_stride - 1) / ag_stride; -- thread_init(); -+ while (thread_count > max_threads) { -+ ag_stride *= 2; -+ thread_count = (glob_agcount + ag_stride - 1) / -+ ag_stride; -+ } -+ if (thread_count > 0) -+ thread_init(); -+ else { -+ thread_count = 1; -+ ag_stride = 0; -+ } - } - - if (ag_stride && report_interval) { -@@ -895,6 +920,11 @@ _("Note - stripe unit (%d) and width (%d) fields have been reset.\n" - if (verbose) - summary_report(); - do_log(_("done\n")); -+ -+ if (dangerously && !no_modify) -+ do_warn( -+_("Repair of readonly mount complete. Immediate reboot encouraged.\n")); -+ - pftrace_done(); - - return (0); diff --git a/SPECS/xfsprogs.spec b/SPECS/xfsprogs.spec index 6425f57..c3d490b 100644 --- a/SPECS/xfsprogs.spec +++ b/SPECS/xfsprogs.spec @@ -1,7 +1,7 @@ Summary: Utilities for managing the XFS filesystem Name: xfsprogs -Version: 3.2.0 -Release: 0.10.alpha2%{?dist} +Version: 3.2.1 +Release: 6%{?dist} # Licensing based on generic "GNU GENERAL PUBLIC LICENSE" # in source, with no mention of version. # doc/COPYING file specifies what is GPL and what is LGPL @@ -9,7 +9,7 @@ Release: 0.10.alpha2%{?dist} License: GPL+ and LGPLv2+ Group: System Environment/Base URL: http://oss.sgi.com/projects/xfs/ -Source0: ftp://oss.sgi.com/projects/xfs/cmd_tars/%{name}-%{version}-alpha2.tar.gz +Source0: ftp://oss.sgi.com/projects/xfs/cmd_tars/%{name}-%{version}.tar.gz Source1: xfsprogs-wrapper.h BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: libtool, gettext, libuuid-devel @@ -18,12 +18,13 @@ Provides: xfs-cmds Obsoletes: xfs-cmds <= %{version} Conflicts: xfsdump < 3.0.1 -# Bring xfsprogs up to fd799f7 in the upstream repo -Patch0: xfsprogs-diff-since-alpha2.patch -# 2 small patches on list not yet committed -Patch1: xfsprogs-3.2.0-repair-zero-sb.patch -Patch2: xfsprogs-3.2.0-xfs_db-quiet.patch -Patch3: xfsprogs-3.2.0-prefetch-fix.patch +Patch0: xfsprogs-3.2.1-quota-fix-NULL-pointer-dereference-in-report_f.patch +Patch1: xfsprogs-3.2.1-libxcmd-make-all-comparisons-using-realpathd-paths.patch +Patch2: xfsprogs-3.2.1-add-supported-file-attributes-to-xfs.5-manpage.patch +Patch3: xfsprogs-3.2.1-copy-stripe-geometry.patch +Patch4: xfsprogs-3.2.1-xfs_quota-manpage.patch +Patch5: xfsprogs-3.2.1-xfs_copy-simplify-first_agbno-calculation.patch +Patch6: xfsprogs-3.2.1-xfs_repair-fix-max-block-offset-test.patch %description A set of commands to use the XFS filesystem, including mkfs.xfs. @@ -66,12 +67,14 @@ You should install xfsprogs-qa-devel only if you are interested in building or running the xfstests QA suite. %prep -%setup -q -n xfsprogs-3.2.0-alpha2 - +%setup -q %patch0 -p1 %patch1 -p1 %patch2 -p1 %patch3 -p1 +%patch4 -p1 +%patch5 -p1 +%patch6 -p1 %build export tagname=CC @@ -162,6 +165,7 @@ rm -rf $RPM_BUILD_ROOT %{_includedir}/xfs/xfs_alloc_btree.h %{_includedir}/xfs/xfs_arch.h %{_includedir}/xfs/xfs_attr_leaf.h +%{_includedir}/xfs/xfs_attr_remote.h %{_includedir}/xfs/xfs_attr_sf.h %{_includedir}/xfs/xfs_bit.h %{_includedir}/xfs/xfs_bmap.h @@ -190,6 +194,27 @@ rm -rf $RPM_BUILD_ROOT %{_includedir}/xfs/xfs_trans_space.h %changelog +* Fri Dec 19 2014 Eric Sandeen 3.2.1-6 +- xfs_repair: fix maximum block offset test (#1173146) +- xfs_copy: fix assert failure on 4k sector devices (#1162414) +- xfs_quota: man page updates (#175133, #1175627) + +* Fri Oct 24 2014 Eric Sandeen 3.2.1-5 +- xfs_repair: copy stripe geometry from backup supers if needed (#1150857) + +* Wed Sep 17 2014 Eric Sandeen 3.2.1-3 +- Add supported file attributes to xfs.5 manpage (#1142555) + +* Mon Sep 15 2014 Eric Sandeen 3.2.1-2 +- xfs_quota: fix segfault when reporting on nonexistant path (#1077826) +- xfs_quota: fix reporting on symlinked paths (#1077841) + +* Tue Jul 15 2014 Eric Sandeen 3.2.1-1 +- New upstream release (#1119940) +- xfs_copy: fix data corruption of target (#1105170) +- mkfs.xfs: handle mkfs of file on 4k block device (#1101236) +- xfs_copy: don't exit with error code on success (#1100376) + * Tue Mar 11 2014 Eric Sandeen 3.2.0-0.10.alpha2 - Fix bug in xfs_repair's inode prefetch (#1083820)