Blame SOURCES/xfsprogs-5.10.0-xfs-widen-ondisk-inode-timestamps-to-deal-with-y2038.patch

f49185
From e7e3beb95efd751f227a0ced4c83fc5b88582e2e Mon Sep 17 00:00:00 2001
f49185
From: "Darrick J. Wong" <darrick.wong@oracle.com>
f49185
Date: Wed, 11 Nov 2020 20:08:14 -0500
f49185
Subject: [PATCH] xfs: widen ondisk inode timestamps to deal with y2038+
f49185
f49185
Source kernel commit: f93e5436f0ee5a85eaa3a86d2614d215873fb18b
f49185
f49185
Redesign the ondisk inode timestamps to be a simple unsigned 64-bit
f49185
counter of nanoseconds since 14 Dec 1901 (i.e. the minimum time in the
f49185
32-bit unix time epoch).  This enables us to handle dates up to 2486,
f49185
which solves the y2038 problem.
f49185
f49185
sandeen: update xfs_flags2diflags2() as well, to match
f49185
f49185
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
f49185
Reviewed-by: Christoph Hellwig <hch@lst.de>
f49185
Reviewed-by: Gao Xiang <hsiangkao@redhat.com>
f49185
Reviewed-by: Dave Chinner <dchinner@redhat.com>
f49185
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
f49185
---
f49185
f49185
NOTE: xfs_trans_inode.c was not brought over in this version, so code
f49185
landed in trans.c
f49185
f49185
We also do not have the pre-computed geometry, so that needs to be
f49185
explicitly added to libxfs_ialloc rather than inheriting from igeo.
f49185
f49185
diff --git a/include/xfs_inode.h b/include/xfs_inode.h
f49185
index ddd48be..25f2eac 100644
f49185
--- a/include/xfs_inode.h
f49185
+++ b/include/xfs_inode.h
f49185
@@ -146,6 +146,11 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
f49185
 	return ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
f49185
 }
f49185
 
f49185
+static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
f49185
+{
f49185
+	return ip->i_d.di_flags2 & XFS_DIFLAG2_BIGTIME;
f49185
+}
f49185
+
f49185
 typedef struct cred {
f49185
 	uid_t	cr_uid;
f49185
 	gid_t	cr_gid;
f49185
diff --git a/libxfs/trans.c b/libxfs/trans.c
f49185
index db90624..54e4dd6 100644
f49185
--- a/libxfs/trans.c
f49185
+++ b/libxfs/trans.c
f49185
@@ -415,6 +415,17 @@ xfs_trans_log_inode(
f49185
 	tp->t_flags |= XFS_TRANS_DIRTY;
f49185
 	set_bit(XFS_LI_DIRTY, &ip->i_itemp->ili_item.li_flags);
f49185
 
f49185
+	/*
f49185
+	 * If we're updating the inode core or the timestamps and it's possible
f49185
+	 * to upgrade this inode to bigtime format, do so now.
f49185
+	 */
f49185
+	if ((flags & (XFS_ILOG_CORE | XFS_ILOG_TIMESTAMP)) &&
f49185
+	    xfs_sb_version_hasbigtime(&ip->i_mount->m_sb) &&
f49185
+	    !xfs_inode_has_bigtime(ip)) {
f49185
+		ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME;
f49185
+		flags |= XFS_ILOG_CORE;
f49185
+	}
f49185
+
f49185
 	/*
f49185
 	 * Always OR in the bits from the ili_last_fields field.
f49185
 	 * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
f49185
diff --git a/libxfs/util.c b/libxfs/util.c
f49185
index 9383bb8..7a8729f 100644
f49185
--- a/libxfs/util.c
f49185
+++ b/libxfs/util.c
f49185
@@ -222,7 +222,8 @@ xfs_flags2diflags2(
f49185
 	unsigned int		xflags)
f49185
 {
f49185
 	uint64_t		di_flags2 =
f49185
-		(ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK);
f49185
+		(ip->i_d.di_flags2 & (XFS_DIFLAG2_REFLINK |
f49185
+				      XFS_DIFLAG2_BIGTIME));
f49185
 
f49185
 	if (xflags & FS_XFLAG_DAX)
f49185
 		di_flags2 |= XFS_DIFLAG2_DAX;
f49185
@@ -317,8 +318,14 @@ libxfs_ialloc(
f49185
 		ASSERT(ip->i_d.di_ino == ino);
f49185
 		ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
f49185
 		VFS_I(ip)->i_version = 1;
f49185
-		ip->i_d.di_flags2 = pip ? 0 : xfs_flags2diflags2(ip,
f49185
-				fsx->fsx_xflags);
f49185
+		if (pip) {
f49185
+			ip->i_d.di_flags2 = 0;
f49185
+			if (xfs_sb_version_hasbigtime(&ip->i_mount->m_sb))
f49185
+				ip->i_d.di_flags2 |= XFS_DIFLAG2_BIGTIME;
f49185
+		} else {
f49185
+			ip->i_d.di_flags2 = xfs_flags2diflags2(ip, fsx->fsx_xflags);
f49185
+		}
f49185
+
f49185
 		ip->i_d.di_crtime.tv_sec = (int32_t)VFS_I(ip)->i_mtime.tv_sec;
f49185
 		ip->i_d.di_crtime.tv_nsec = (int32_t)VFS_I(ip)->i_mtime.tv_nsec;
f49185
 		ip->i_d.di_cowextsize = pip ? 0 : fsx->fsx_cowextsize;
f49185
diff --git a/libxfs/xfs_format.h b/libxfs/xfs_format.h
f49185
index 371f5cd..b1f6219 100644
f49185
--- a/libxfs/xfs_format.h
f49185
+++ b/libxfs/xfs_format.h
f49185
@@ -466,6 +466,7 @@ xfs_sb_has_ro_compat_feature(
f49185
 #define XFS_SB_FEAT_INCOMPAT_FTYPE	(1 << 0)	/* filetype in dirent */
f49185
 #define XFS_SB_FEAT_INCOMPAT_SPINODES	(1 << 1)	/* sparse inode chunks */
f49185
 #define XFS_SB_FEAT_INCOMPAT_META_UUID	(1 << 2)	/* metadata UUID */
f49185
+#define XFS_SB_FEAT_INCOMPAT_BIGTIME	(1 << 3)	/* large timestamps */
f49185
 #define XFS_SB_FEAT_INCOMPAT_ALL \
f49185
 		(XFS_SB_FEAT_INCOMPAT_FTYPE|	\
f49185
 		 XFS_SB_FEAT_INCOMPAT_SPINODES|	\
f49185
@@ -580,6 +581,12 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
f49185
 #define	XFS_FSB_TO_DADDR(mp,fsbno)	XFS_AGB_TO_DADDR(mp, \
f49185
 			XFS_FSB_TO_AGNO(mp,fsbno), XFS_FSB_TO_AGBNO(mp,fsbno))
f49185
 
f49185
+static inline bool xfs_sb_version_hasbigtime(struct xfs_sb *sbp)
f49185
+{
f49185
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
f49185
+		(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME);
f49185
+}
f49185
+
f49185
 /*
f49185
  * File system sector to basic block conversions.
f49185
  */
f49185
@@ -849,6 +856,13 @@ typedef struct xfs_agfl {
f49185
  * Therefore, the ondisk min and max defined here can be used directly to
f49185
  * constrain the incore timestamps on a Unix system.  Note that we actually
f49185
  * encode a __be64 value on disk.
f49185
+ *
f49185
+ * When the bigtime feature is enabled, ondisk inode timestamps become an
f49185
+ * unsigned 64-bit nanoseconds counter.  This means that the bigtime inode
f49185
+ * timestamp epoch is the start of the classic timestamp range, which is
f49185
+ * Dec 31 20:45:52 UTC 1901.  Because the epochs are not the same, callers
f49185
+ * /must/ use the bigtime conversion functions when encoding and decoding raw
f49185
+ * timestamps.
f49185
  */
f49185
 typedef __be64 xfs_timestamp_t;
f49185
 
f49185
@@ -870,6 +884,50 @@ struct xfs_legacy_timestamp {
f49185
  */
f49185
 #define XFS_LEGACY_TIME_MAX	((int64_t)S32_MAX)
f49185
 
f49185
+/*
f49185
+ * Smallest possible ondisk seconds value with bigtime timestamps.  This
f49185
+ * corresponds (after conversion to a Unix timestamp) with the traditional
f49185
+ * minimum timestamp of Dec 13 20:45:52 UTC 1901.
f49185
+ */
f49185
+#define XFS_BIGTIME_TIME_MIN	((int64_t)0)
f49185
+
f49185
+/*
f49185
+ * Largest supported ondisk seconds value with bigtime timestamps.  This
f49185
+ * corresponds (after conversion to a Unix timestamp) with an incore timestamp
f49185
+ * of Jul  2 20:20:24 UTC 2486.
f49185
+ *
f49185
+ * We round down the ondisk limit so that the bigtime quota and inode max
f49185
+ * timestamps will be the same.
f49185
+ */
f49185
+#define XFS_BIGTIME_TIME_MAX	((int64_t)((-1ULL / NSEC_PER_SEC) & ~0x3ULL))
f49185
+
f49185
+/*
f49185
+ * Bigtime epoch is set exactly to the minimum time value that a traditional
f49185
+ * 32-bit timestamp can represent when using the Unix epoch as a reference.
f49185
+ * Hence the Unix epoch is at a fixed offset into the supported bigtime
f49185
+ * timestamp range.
f49185
+ *
f49185
+ * The bigtime epoch also matches the minimum value an on-disk 32-bit XFS
f49185
+ * timestamp can represent so we will not lose any fidelity in converting
f49185
+ * to/from unix and bigtime timestamps.
f49185
+ *
f49185
+ * The following conversion factor converts a seconds counter from the Unix
f49185
+ * epoch to the bigtime epoch.
f49185
+ */
f49185
+#define XFS_BIGTIME_EPOCH_OFFSET	(-(int64_t)S32_MIN)
f49185
+
f49185
+/* Convert a timestamp from the Unix epoch to the bigtime epoch. */
f49185
+static inline uint64_t xfs_unix_to_bigtime(time64_t unix_seconds)
f49185
+{
f49185
+	return (uint64_t)unix_seconds + XFS_BIGTIME_EPOCH_OFFSET;
f49185
+}
f49185
+
f49185
+/* Convert a timestamp from the bigtime epoch to the Unix epoch. */
f49185
+static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds)
f49185
+{
f49185
+	return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET;
f49185
+}
f49185
+
f49185
 /*
f49185
  * On-disk inode structure.
f49185
  *
f49185
@@ -1096,12 +1154,22 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
f49185
 #define XFS_DIFLAG2_DAX_BIT	0	/* use DAX for this inode */
f49185
 #define XFS_DIFLAG2_REFLINK_BIT	1	/* file's blocks may be shared */
f49185
 #define XFS_DIFLAG2_COWEXTSIZE_BIT   2  /* copy on write extent size hint */
f49185
+#define XFS_DIFLAG2_BIGTIME_BIT	3	/* big timestamps */
f49185
+
f49185
 #define XFS_DIFLAG2_DAX		(1 << XFS_DIFLAG2_DAX_BIT)
f49185
 #define XFS_DIFLAG2_REFLINK     (1 << XFS_DIFLAG2_REFLINK_BIT)
f49185
 #define XFS_DIFLAG2_COWEXTSIZE  (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
f49185
+#define XFS_DIFLAG2_BIGTIME	(1 << XFS_DIFLAG2_BIGTIME_BIT)
f49185
 
f49185
 #define XFS_DIFLAG2_ANY \
f49185
-	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE)
f49185
+	(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
f49185
+	 XFS_DIFLAG2_BIGTIME)
f49185
+
f49185
+static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
f49185
+{
f49185
+	return dip->di_version >= 3 &&
f49185
+	       (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_BIGTIME));
f49185
+}
f49185
 
f49185
 /*
f49185
  * Inode number format:
f49185
diff --git a/libxfs/xfs_fs.h b/libxfs/xfs_fs.h
f49185
index 4fa9852..714dba1 100644
f49185
--- a/libxfs/xfs_fs.h
f49185
+++ b/libxfs/xfs_fs.h
f49185
@@ -231,6 +231,7 @@ typedef struct xfs_fsop_resblks {
f49185
 #define XFS_FSOP_GEOM_FLAGS_SPINODES	0x40000	/* sparse inode chunks	*/
f49185
 #define XFS_FSOP_GEOM_FLAGS_RMAPBT	0x80000	/* reverse mapping btree */
f49185
 #define XFS_FSOP_GEOM_FLAGS_REFLINK	0x100000 /* files can share blocks */
f49185
+#define XFS_FSOP_GEOM_FLAGS_BIGTIME	0x200000 /* 64-bit nsec timestamps */
f49185
 
f49185
 /*
f49185
  * Minimum and maximum sizes need for growth checks.
f49185
diff --git a/libxfs/xfs_inode_buf.c b/libxfs/xfs_inode_buf.c
f49185
index d8831a1..8cd16bf 100644
f49185
--- a/libxfs/xfs_inode_buf.c
f49185
+++ b/libxfs/xfs_inode_buf.c
f49185
@@ -195,14 +195,29 @@ xfs_imap_to_bp(
f49185
 	return 0;
f49185
 }
f49185
 
f49185
+static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts)
f49185
+{
f49185
+	struct timespec64	tv;
f49185
+	uint32_t		n;
f49185
+
f49185
+	tv.tv_sec = xfs_bigtime_to_unix(div_u64_rem(ts, NSEC_PER_SEC, &n);;
f49185
+	tv.tv_nsec = n;
f49185
+
f49185
+	return tv;
f49185
+}
f49185
+
f49185
 /* Convert an ondisk timestamp to an incore timestamp. */
f49185
 struct timespec64
f49185
 xfs_inode_from_disk_ts(
f49185
+	struct xfs_dinode		*dip,
f49185
 	const xfs_timestamp_t		ts)
f49185
 {
f49185
 	struct timespec64		tv;
f49185
 	struct xfs_legacy_timestamp	*lts;
f49185
 
f49185
+	if (xfs_dinode_has_bigtime(dip))
f49185
+		return xfs_inode_decode_bigtime(be64_to_cpu(ts));
f49185
+
f49185
 	lts = (struct xfs_legacy_timestamp *)&ts;
f49185
 	tv.tv_sec = (int)be32_to_cpu(lts->t_sec);
f49185
 	tv.tv_nsec = (int)be32_to_cpu(lts->t_nsec);
f49185
@@ -246,9 +261,9 @@ xfs_inode_from_disk(
f49185
 	 * a time before epoch is converted to a time long after epoch
f49185
 	 * on 64 bit systems.
f49185
 	 */
f49185
-	inode->i_atime = xfs_inode_from_disk_ts(from->di_atime);
f49185
-	inode->i_mtime = xfs_inode_from_disk_ts(from->di_mtime);
f49185
-	inode->i_ctime = xfs_inode_from_disk_ts(from->di_ctime);
f49185
+	inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
f49185
+	inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
f49185
+	inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
f49185
 
f49185
 	inode->i_generation = be32_to_cpu(from->di_gen);
f49185
 	inode->i_mode = be16_to_cpu(from->di_mode);
f49185
@@ -267,7 +282,7 @@ xfs_inode_from_disk(
f49185
 	if (to->di_version == 3) {
f49185
 		inode_set_iversion_queried(inode,
f49185
 					   be64_to_cpu(from->di_changecount));
f49185
-		to->di_crtime = xfs_inode_from_disk_ts(from->di_crtime);
f49185
+		to->di_crtime = xfs_inode_from_disk_ts(from, from->di_crtime);
f49185
 		to->di_flags2 = be64_to_cpu(from->di_flags2);
f49185
 		to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
f49185
 	}
f49185
@@ -276,11 +291,15 @@ xfs_inode_from_disk(
f49185
 /* Convert an incore timestamp to an ondisk timestamp. */
f49185
 static inline xfs_timestamp_t
f49185
 xfs_inode_to_disk_ts(
f49185
+	struct xfs_inode		*ip,
f49185
 	const struct timespec64		tv)
f49185
 {
f49185
 	struct xfs_legacy_timestamp	*lts;
f49185
 	xfs_timestamp_t			ts;
f49185
 
f49185
+	if (xfs_inode_has_bigtime(ip))
f49185
+		return cpu_to_be64(xfs_inode_encode_bigtime(tv));
f49185
+
f49185
 	lts = (struct xfs_legacy_timestamp *)&ts;
f49185
 	lts->t_sec = cpu_to_be32(tv.tv_sec);
f49185
 	lts->t_nsec = cpu_to_be32(tv.tv_nsec);
f49185
@@ -308,9 +327,9 @@ xfs_inode_to_disk(
f49185
 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
f49185
 
f49185
 	memset(to->di_pad, 0, sizeof(to->di_pad));
f49185
-	to->di_atime = xfs_inode_to_disk_ts(inode->i_atime);
f49185
-	to->di_mtime = xfs_inode_to_disk_ts(inode->i_mtime);
f49185
-	to->di_ctime = xfs_inode_to_disk_ts(inode->i_ctime);
f49185
+	to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
f49185
+	to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
f49185
+	to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
f49185
 	to->di_nlink = cpu_to_be32(inode->i_nlink);
f49185
 	to->di_gen = cpu_to_be32(inode->i_generation);
f49185
 	to->di_mode = cpu_to_be16(inode->i_mode);
f49185
@@ -328,7 +347,7 @@ xfs_inode_to_disk(
f49185
 
f49185
 	if (from->di_version == 3) {
f49185
 		to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
f49185
-		to->di_crtime = xfs_inode_to_disk_ts(from->di_crtime);
f49185
+		to->di_crtime = xfs_inode_to_disk_ts(ip, from->di_crtime);
f49185
 		to->di_flags2 = cpu_to_be64(from->di_flags2);
f49185
 		to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
f49185
 		to->di_ino = cpu_to_be64(ip->i_ino);
f49185
@@ -547,6 +566,11 @@ xfs_dinode_verify(
f49185
 	if (fa)
f49185
 		return fa;
f49185
 
f49185
+	/* bigtime iflag can only happen on bigtime filesystems */
f49185
+	if (xfs_dinode_has_bigtime(dip) &&
f49185
+	    !xfs_sb_version_hasbigtime(&mp->m_sb))
f49185
+		return __this_address;
f49185
+
f49185
 	return NULL;
f49185
 }
f49185
 
f49185
diff --git a/libxfs/xfs_inode_buf.h b/libxfs/xfs_inode_buf.h
f49185
index 6147f42..2b91e60 100644
f49185
--- a/libxfs/xfs_inode_buf.h
f49185
+++ b/libxfs/xfs_inode_buf.h
f49185
@@ -40,6 +40,11 @@ struct xfs_icdinode {
f49185
 	struct timespec64 di_crtime;	/* time created */
f49185
 };
f49185
 
f49185
+static inline bool xfs_icdinode_has_bigtime(const struct xfs_icdinode *icd)
f49185
+{
f49185
+	return icd->di_flags2 & XFS_DIFLAG2_BIGTIME;
f49185
+}
f49185
+
f49185
 /*
f49185
  * Inode location information.  Stored in the inode and passed to
f49185
  * xfs_imap_to_bp() to get a buffer and dinode for a given inode.
f49185
@@ -76,6 +81,12 @@ xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
f49185
 		uint32_t cowextsize, uint16_t mode, uint16_t flags,
f49185
 		uint64_t flags2);
f49185
 
f49185
-struct timespec64 xfs_inode_from_disk_ts(const xfs_timestamp_t ts);
f49185
+static inline uint64_t xfs_inode_encode_bigtime(struct timespec64 tv)
f49185
+{
f49185
+	return xfs_unix_to_bigtime(tv.tv_sec) * NSEC_PER_SEC + tv.tv_nsec;
f49185
+}
f49185
+
f49185
+struct timespec64 xfs_inode_from_disk_ts(struct xfs_dinode *dip,
f49185
+		const xfs_timestamp_t ts);
f49185
 
f49185
 #endif	/* __XFS_INODE_BUF_H__ */
f49185
diff --git a/libxfs/xfs_sb.c b/libxfs/xfs_sb.c
f49185
index cee77a6..d11545b 100644
f49185
--- a/libxfs/xfs_sb.c
f49185
+++ b/libxfs/xfs_sb.c
f49185
@@ -1124,6 +1124,8 @@ xfs_fs_geometry(
f49185
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT;
f49185
 	if (xfs_sb_version_hasreflink(sbp))
f49185
 		geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK;
f49185
+	if (xfs_sb_version_hasbigtime(sbp))
f49185
+		geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME;
f49185
 	if (xfs_sb_version_hassector(sbp))
f49185
 		geo->logsectsize = sbp->sb_logsectsize;
f49185
 	else