From 9f2a32a8fd08bb1b48f29c88bfa398fa4eb5f2a4 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Wed, 6 Jun 2018 11:59:16 +0200 Subject: [PATCH 159/173] fallocate: backport v2.32-164-g641af90dc * add --dig-holes * add --collapse-range * add --insert-range * add --zero-range For backward compatibility with previous RHEL7 versions we keep O_CREAT for open(). The current upstream uses O_CREAT only when necessary. Addresses: http://bugzilla.redhat.com/show_bug.cgi?id=1528567 Signed-off-by: Karel Zak --- sys-utils/fallocate.1 | 195 ++++++++++++++++++++++------ sys-utils/fallocate.c | 349 +++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 428 insertions(+), 116 deletions(-) diff --git a/sys-utils/fallocate.1 b/sys-utils/fallocate.1 index 376353013..d4821dcd1 100644 --- a/sys-utils/fallocate.1 +++ b/sys-utils/fallocate.1 @@ -1,72 +1,185 @@ -.\" -*- nroff -*- -.TH FALLOCATE 1 "September 2011" "util-linux" "User Commands" +.TH FALLOCATE 1 "April 2014" "util-linux" "User Commands" .SH NAME -fallocate \- preallocate space to a file +fallocate \- preallocate or deallocate space to a file .SH SYNOPSIS .B fallocate -.RB [ \-n ] -.RB [ \-p ] +.RB [ \-c | \-p | \-z ] .RB [ \-o .IR offset ] .B \-l -.IR length +.I length +.RB [ \-n ] +.I filename +.PP +.B fallocate \-d +.RB [ \-o +.IR offset ] +.RB [ \-l +.IR length ] .I filename .PP .B fallocate \-x .RB [ \-o .IR offset ] -.RB \-l -.IR length +.B \-l +.I length .I filename .SH DESCRIPTION .B fallocate -is used to preallocate blocks to a file. For filesystems which support the -fallocate system call, this is done quickly by allocating blocks and marking -them as uninitialized, requiring no IO to the data blocks. This is much faster -than creating a file by filling it with zeros. -.PP -As of the Linux Kernel v2.6.31, the fallocate system call is supported by the -btrfs, ext4, ocfs2, and xfs filesystems. +is used to manipulate the allocated disk space for a file, +either to deallocate or preallocate it. +For filesystems which support the fallocate system call, +preallocation is done quickly by allocating blocks and marking them as +uninitialized, requiring no IO to the data blocks. +This is much faster than creating a file by filling it with zeroes. .PP The exit code returned by .B fallocate is 0 on success and 1 on failure. -.PP .SH OPTIONS -The \fIlength\fR and \fIoffset\fR arguments may be followed by the multiplicative -suffixes KiB=1024, MiB=1024*1024, and so on for GiB, TiB, PiB, EiB, ZiB and YiB -(the "iB" is optional, e.g. "K" has the same meaning as "KiB") or the suffixes -KB=1000, MB=1000*1000, and so on for GB, TB, PB, EB, ZB and YB. -.IP "\fB\-n, \-\-keep-size\fP" +The +.I length +and +.I offset +arguments may be followed by the multiplicative suffixes KiB (=1024), +MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB, and YiB (the "iB" is +optional, e.g., "K" has the same meaning as "KiB") or the suffixes +KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB, and YB. +.PP +The options +.BR \-\-collapse\-range ", " \-\-dig\-holes ", " \-\-punch\-hole , +and +.B \-\-zero\-range +are mutually exclusive. +.TP +.BR \-c ", " \-\-collapse\-range +Removes a byte range from a file, without leaving a hole. +The byte range to be collapsed starts at +.I offset +and continues for +.I length +bytes. +At the completion of the operation, +the contents of the file starting at the location +.IR offset + length +will be appended at the location +.IR offset , +and the file will be +.I length +bytes smaller. +The option +.B \-\-keep\-size +may not be specified for the collapse-range operation. +.sp +Available since Linux 3.15 for ext4 (only for extent-based files) and XFS. +.TP +.BR \-d ", " \-\-dig\-holes +Detect and dig holes. +This makes the file sparse in-place, without using extra disk space. +The minimum size of the hole depends on filesystem I/O block size +(usually 4096 bytes). +Also, when using this option, +.B \-\-keep\-size +is implied. If no range is specified by +.B \-\-offset +and +.BR \-\-length , +then the entire file is analyzed for holes. +.sp +You can think of this option as doing a +.RB """" "cp \-\-sparse" """" +and then renaming the destination file to the original, +without the need for extra disk space. +.sp +See \fB\-\-punch\-hole\fP for a list of supported filesystems. +.TP +.BR \-i ", " \-\-insert\-range +Insert a hole of +.I length +bytes from +.IR offset , +shifting existing data. +.TP +.BR \-l ", " "\-\-length " \fIlength +Specifies the length of the range, in bytes. +.TP +.BR \-n ", " \-\-keep\-size Do not modify the apparent length of the file. This may effectively allocate blocks past EOF, which can be removed with a truncate. -.IP "\fB\-p, \-\-punch-hole\fP" -Punch holes in the file, the range should not exceed the length of the file. -.IP "\fB\-o, \-\-offset\fP \fIoffset\fP -Specifies the beginning offset of the allocation, in bytes. -.IP "\fB\-l, \-\-length\fP \fIlength\fP -Specifies the length of the allocation, in bytes. -.IP "\fB\-x , \-\-posix\fP -Enable POSIX operation mode. In that mode allocation operation always completes, -but it may take longer time when fast allocation is not supported by the underlying filesystem. -.IP "\fB\-h, \-\-help\fP" -Print help and exit. -.IP "\fB-V, \-\-version" -Print version and exit. +.TP +.BR \-o ", " "\-\-offset " \fIoffset +Specifies the beginning offset of the range, in bytes. +.TP +.BR \-p ", " \-\-punch\-hole +Deallocates space (i.e., creates a hole) in the byte range starting at +.I offset +and continuing for +.I length +bytes. +Within the specified range, partial filesystem blocks are zeroed, +and whole filesystem blocks are removed from the file. +After a successful call, +subsequent reads from this range will return zeroes. +This option may not be specified at the same time as the +.B \-\-zero\-range +option. +Also, when using this option, +.B \-\-keep\-size +is implied. +.sp +Supported for XFS (since Linux 2.6.38), ext4 (since Linux 3.0), +Btrfs (since Linux 3.7) and tmpfs (since Linux 3.5). +.TP +.BR \-v ", " \-\-verbose +Enable verbose mode. +.TP +.BR \-x ", " \-\-posix +Enable POSIX operation mode. +In that mode allocation operation always completes, +but it may take longer time when fast allocation is not supported by +the underlying filesystem. +.TP +.BR \-z ", " \-\-zero\-range +Zeroes space in the byte range starting at +.I offset +and continuing for +.I length +bytes. +Within the specified range, blocks are preallocated for the regions +that span the holes in the file. +After a successful call, +subsequent reads from this range will return zeroes. +.sp +Zeroing is done within the filesystem preferably by converting the +range into unwritten extents. This approach means that the specified +range will not be physically zeroed out on the device (except for +partial blocks at the either end of the range), and I/O is +(otherwise) required only to update metadata. +.sp +Option \fB\-\-keep\-size\fP can be specified to prevent file length +modification. +.sp +Available since Linux 3.14 for ext4 (only for extent-based files) and XFS. +.TP +.BR \-V ", " \-\-version +Display version information and exit. +.TP +.BR \-h ", " \-\-help +Display help text and exit. .SH AUTHORS -.UR sandeen@redhat.com +.MT sandeen@redhat.com Eric Sandeen -.UE +.ME .br -.UR kzak@redhat.com +.MT kzak@redhat.com Karel Zak -.UE +.ME .SH SEE ALSO +.BR truncate (1), .BR fallocate (2), -.BR posix_fallocate (3), -.BR truncate (1) +.BR posix_fallocate (3) .SH AVAILABILITY The fallocate command is part of the util-linux package and is available from -.UR ftp://\:ftp.kernel.org\:/pub\:/linux\:/utils\:/util-linux/ +.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/ Linux Kernel Archive .UE . diff --git a/sys-utils/fallocate.c b/sys-utils/fallocate.c index 17ae5fe69..75d89a7a9 100644 --- a/sys-utils/fallocate.c +++ b/sys-utils/fallocate.c @@ -23,6 +23,7 @@ */ #include #include +#include #include #include #include @@ -31,50 +32,110 @@ #include #include #include +#include #ifndef HAVE_FALLOCATE # include #endif -#ifdef HAVE_LINUX_FALLOC_H -# include /* for FALLOC_FL_* flags */ +#if defined(HAVE_LINUX_FALLOC_H) && \ + (!defined(FALLOC_FL_KEEP_SIZE) || !defined(FALLOC_FL_PUNCH_HOLE) || \ + !defined(FALLOC_FL_COLLAPSE_RANGE) || !defined(FALLOC_FL_ZERO_RANGE) || \ + !defined(FALLOC_FL_INSERT_RANGE)) +# include /* non-libc fallback for FALLOC_FL_* flags */ #endif + #ifndef FALLOC_FL_KEEP_SIZE -# define FALLOC_FL_KEEP_SIZE 1 +# define FALLOC_FL_KEEP_SIZE 0x1 #endif #ifndef FALLOC_FL_PUNCH_HOLE -# define FALLOC_FL_PUNCH_HOLE 2 +# define FALLOC_FL_PUNCH_HOLE 0x2 +#endif + +#ifndef FALLOC_FL_COLLAPSE_RANGE +# define FALLOC_FL_COLLAPSE_RANGE 0x8 +#endif + +#ifndef FALLOC_FL_ZERO_RANGE +# define FALLOC_FL_ZERO_RANGE 0x10 +#endif + +#ifndef FALLOC_FL_INSERT_RANGE +# define FALLOC_FL_INSERT_RANGE 0x20 #endif #include "nls.h" #include "strutils.h" #include "c.h" #include "closestream.h" +#include "xalloc.h" #include "optutils.h" -static void __attribute__((__noreturn__)) usage(FILE *out) +static int verbose; +static char *filename; + +static void __attribute__((__noreturn__)) usage(void) { + FILE *out = stdout; fputs(USAGE_HEADER, out); fprintf(out, _(" %s [options] \n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Preallocate space to, or deallocate space from a file.\n"), out); + fputs(USAGE_OPTIONS, out); - fputs(_(" -n, --keep-size don't modify the length of the file\n" - " -p, --punch-hole punch holes in the file\n" - " -o, --offset offset of the allocation, in bytes\n" - " -l, --length length of the allocation, in bytes\n"), out); + fputs(_(" -c, --collapse-range remove a range from the file\n"), out); + fputs(_(" -d, --dig-holes detect zeroes and replace with holes\n"), out); + fputs(_(" -i, --insert-range insert a hole at range, shifting existing data\n"), out); + fputs(_(" -l, --length length for range operations, in bytes\n"), out); + fputs(_(" -n, --keep-size maintain the apparent size of the file\n"), out); + fputs(_(" -o, --offset offset for range operations, in bytes\n"), out); + fputs(_(" -p, --punch-hole replace a range with a hole (implies -n)\n"), out); + fputs(_(" -z, --zero-range zero and ensure allocation of a range\n"), out); #ifdef HAVE_POSIX_FALLOCATE - fputs(_(" -x, --posix use posix_fallocate(3) instead of fallocate(2)\n"), out); + fputs(_(" -x, --posix use posix_fallocate(3) instead of fallocate(2)\n"), out); #endif + fputs(_(" -v, --verbose verbose mode\n"), out); + fputs(USAGE_SEPARATOR, out); - fputs(USAGE_HELP, out); - fputs(USAGE_VERSION, out); - fprintf(out, USAGE_MAN_TAIL("fallocate(1)")); + printf(USAGE_HELP_OPTIONS(22)); + + printf(USAGE_MAN_TAIL("fallocate(1)")); - exit(out == stderr ? EXIT_FAILURE : EXIT_SUCCESS); + exit(EXIT_SUCCESS); } +static loff_t cvtnum(char *s) +{ + uintmax_t x; + + if (strtosize(s, &x)) + return -1LL; + + return x; +} + +static void xfallocate(int fd, int mode, off_t offset, off_t length) +{ + int error; +#ifdef HAVE_FALLOCATE + error = fallocate(fd, mode, offset, length); +#else + error = syscall(SYS_fallocate, fd, mode, offset, length); +#endif + /* + * EOPNOTSUPP: The FALLOC_FL_KEEP_SIZE is unsupported + * ENOSYS: The filesystem does not support sys_fallocate + */ + if (error < 0) { + if ((mode & FALLOC_FL_KEEP_SIZE) && errno == EOPNOTSUPP) + errx(EXIT_FAILURE, _("fallocate failed: keep size mode is unsupported")); + err(EXIT_FAILURE, _("fallocate failed")); + } +} #ifdef HAVE_POSIX_FALLOCATE static void xposix_fallocate(int fd, off_t offset, off_t length) @@ -86,41 +147,163 @@ static void xposix_fallocate(int fd, off_t offset, off_t length) } #endif +/* The real buffer size has to be bufsize + sizeof(uintptr_t) */ +static int is_nul(void *buf, size_t bufsize) +{ + typedef uintptr_t word; + void const *vp; + char const *cbuf = buf, *cp; + word const *wp = buf; -static loff_t cvtnum(char *s) + /* set sentinel */ + memset((char *) buf + bufsize, '\1', sizeof(word)); + + /* Find first nonzero *word*, or the word with the sentinel. */ + while (*wp++ == 0) + continue; + + /* Find the first nonzero *byte*, or the sentinel. */ + vp = wp - 1; + cp = vp; + + while (*cp++ == 0) + continue; + + return cbuf + bufsize < cp; +} + +static void dig_holes(int fd, off_t file_off, off_t len) { - uintmax_t x; + off_t file_end = len ? file_off + len : 0; + off_t hole_start = 0, hole_sz = 0; + uintmax_t ct = 0; + size_t bufsz; + char *buf; + struct stat st; +#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE) + off_t cache_start = file_off; + /* + * We don't want to call POSIX_FADV_DONTNEED to discard cached + * data in PAGE_SIZE steps. IMHO it's overkill (too many syscalls). + * + * Let's assume that 1MiB (on system with 4K page size) is just + * a good compromise. + * -- kzak Feb-2014 + */ + const size_t cachesz = getpagesize() * 256; +#endif - if (strtosize(s, &x)) - return -1LL; + if (fstat(fd, &st) != 0) + err(EXIT_FAILURE, _("stat of %s failed"), filename); - return x; + bufsz = st.st_blksize; + + if (lseek(fd, file_off, SEEK_SET) < 0) + err(EXIT_FAILURE, _("seek on %s failed"), filename); + + /* buffer + extra space for is_nul() sentinel */ + buf = xmalloc(bufsz + sizeof(uintptr_t)); + while (file_end == 0 || file_off < file_end) { + /* + * Detect data area (skip holes) + */ + off_t end, off; + + off = lseek(fd, file_off, SEEK_DATA); + if ((off == -1 && errno == ENXIO) || + (file_end && off >= file_end)) + break; + + end = lseek(fd, off, SEEK_HOLE); + if (file_end && end > file_end) + end = file_end; + +#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE) + posix_fadvise(fd, off, end, POSIX_FADV_SEQUENTIAL); +#endif + /* + * Dig holes in the area + */ + while (off < end) { + ssize_t rsz = pread(fd, buf, bufsz, off); + if (rsz < 0 && errno) + err(EXIT_FAILURE, _("%s: read failed"), filename); + if (end && rsz > 0 && off > end - rsz) + rsz = end - off; + if (rsz <= 0) + break; + + if (is_nul(buf, rsz)) { + if (!hole_sz) /* new hole detected */ + hole_start = off; + hole_sz += rsz; + } else if (hole_sz) { + xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, + hole_start, hole_sz); + ct += hole_sz; + hole_sz = hole_start = 0; + } + +#if defined(POSIX_FADV_DONTNEED) && defined(HAVE_POSIX_FADVISE) + /* discard cached data */ + if (off - cache_start > (off_t) cachesz) { + size_t clen = off - cache_start; + + clen = (clen / cachesz) * cachesz; + posix_fadvise(fd, cache_start, clen, POSIX_FADV_DONTNEED); + cache_start = cache_start + clen; + } +#endif + off += rsz; + } + if (hole_sz) { + xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, + hole_start, hole_sz); + ct += hole_sz; + } + file_off = off; + } + + free(buf); + + if (verbose) { + char *str = size_to_human_string(SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE, ct); + fprintf(stdout, _("%s: %s (%ju bytes) converted to sparse holes.\n"), + filename, str, ct); + free(str); + } } int main(int argc, char **argv) { - char *fname; int c; - int error = 0; int fd; int mode = 0; - int posix = 0; + int dig = 0; + int posix = 0; loff_t length = -2LL; loff_t offset = 0; static const struct option longopts[] = { - { "help", 0, 0, 'h' }, - { "version", 0, 0, 'V' }, - { "keep-size", 0, 0, 'n' }, - { "punch-hole", 0, 0, 'p' }, - { "offset", 1, 0, 'o' }, - { "length", 1, 0, 'l' }, - { "posix", 0, 0, 'x' }, - { NULL, 0, 0, 0 } + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { "keep-size", no_argument, NULL, 'n' }, + { "punch-hole", no_argument, NULL, 'p' }, + { "collapse-range", no_argument, NULL, 'c' }, + { "dig-holes", no_argument, NULL, 'd' }, + { "insert-range", no_argument, NULL, 'i' }, + { "zero-range", no_argument, NULL, 'z' }, + { "offset", required_argument, NULL, 'o' }, + { "length", required_argument, NULL, 'l' }, + { "posix", no_argument, NULL, 'x' }, + { "verbose", no_argument, NULL, 'v' }, + { NULL, 0, NULL, 0 } }; - static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ - { 'x', 'n', 'p' }, + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'c', 'd', 'p', 'z' }, + { 'c', 'n' }, + { 'x', 'c', 'd', 'i', 'n', 'p', 'z'}, { 0 } }; int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; @@ -130,29 +313,39 @@ int main(int argc, char **argv) textdomain(PACKAGE); atexit(close_stdout); - while ((c = getopt_long(argc, argv, "hVnpl:o:x", longopts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "hvVncpdizxl:o:", longopts, NULL)) + != -1) { err_exclusive_options(c, longopts, excl, excl_st); switch(c) { case 'h': - usage(stdout); + usage(); break; - case 'V': - printf(UTIL_LINUX_VERSION); - return EXIT_SUCCESS; - case 'p': - mode |= FALLOC_FL_PUNCH_HOLE; - /* fall through */ - case 'n': - mode |= FALLOC_FL_KEEP_SIZE; + case 'c': + mode |= FALLOC_FL_COLLAPSE_RANGE; + break; + case 'd': + dig = 1; + break; + case 'i': + mode |= FALLOC_FL_INSERT_RANGE; break; case 'l': length = cvtnum(optarg); break; + case 'n': + mode |= FALLOC_FL_KEEP_SIZE; + break; case 'o': offset = cvtnum(optarg); break; + case 'p': + mode |= FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + break; + case 'z': + mode |= FALLOC_FL_ZERO_RANGE; + break; case 'x': #ifdef HAVE_POSIX_FALLOCATE posix = 1; @@ -160,53 +353,59 @@ int main(int argc, char **argv) #else errx(EXIT_FAILURE, _("posix_fallocate support is not compiled")) #endif - default: - usage(stderr); + case 'v': + verbose++; break; + case 'V': + printf(UTIL_LINUX_VERSION); + return EXIT_SUCCESS; + default: + errtryhelp(EXIT_FAILURE); } } - if (length == -2LL) - errx(EXIT_FAILURE, _("no length argument specified")); - if (length <= 0) - errx(EXIT_FAILURE, _("invalid length value specified")); - if (offset < 0) - errx(EXIT_FAILURE, _("invalid offset value specified")); if (optind == argc) - errx(EXIT_FAILURE, _("no filename specified.")); + errx(EXIT_FAILURE, _("no filename specified")); + + filename = argv[optind++]; - fname = argv[optind++]; + if (optind != argc) + errx(EXIT_FAILURE, _("unexpected number of arguments")); - if (optind != argc) { - warnx(_("unexpected number of arguments")); - usage(stderr); + if (dig) { + /* for --dig-holes the default is analyze all file */ + if (length == -2LL) + length = 0; + if (length < 0) + errx(EXIT_FAILURE, _("invalid length value specified")); + } else { + /* it's safer to require the range specification (--length --offset) */ + if (length == -2LL) + errx(EXIT_FAILURE, _("no length argument specified")); + if (length <= 0) + errx(EXIT_FAILURE, _("invalid length value specified")); } + if (offset < 0) + errx(EXIT_FAILURE, _("invalid offset value specified")); - fd = open(fname, O_WRONLY|O_CREAT, 0644); + /* O_CREAT makes sense only for the default fallocate(2) behavior + * when mode is no specified and new space is allocated + * + * RHEL7.6: for backward compatibility in RHEL7 we keep O_CREAT there. + */ + fd = open(filename, O_RDWR | O_CREAT, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); if (fd < 0) - err(EXIT_FAILURE, _("cannot open %s"), fname); + err(EXIT_FAILURE, _("cannot open %s"), filename); + if (dig) + dig_holes(fd, offset, length); #ifdef HAVE_POSIX_FALLOCATE - if (posix) + else if (posix) xposix_fallocate(fd, offset, length); - else #endif - -#ifdef HAVE_FALLOCATE - error = fallocate(fd, mode, offset, length); -#else - error = syscall(SYS_fallocate, fd, mode, offset, length); -#endif - /* - * EOPNOTSUPP: The FALLOC_FL_KEEP_SIZE is unsupported - * ENOSYS: The filesystem does not support sys_fallocate - */ - if (error < 0) { - if ((mode & FALLOC_FL_KEEP_SIZE) && errno == EOPNOTSUPP) - errx(EXIT_FAILURE, - _("keep size mode (-n option) unsupported")); - err(EXIT_FAILURE, _("%s: fallocate failed"), fname); - } + else + xfallocate(fd, mode, offset, length); close(fd); return EXIT_SUCCESS; -- 2.14.4