Blob Blame History Raw
From bb5ed3d9ddd7e64b0ea99e231192c0f3f73a3729 Mon Sep 17 00:00:00 2001
From: Milind Changire <mchangir@redhat.com>
Date: Fri, 12 Jun 2015 16:38:43 +0530
Subject: [PATCH 70/73] tools/glusterfind: ignoring deleted files

OSError and IOError exceptions were being thrown if files
were deleted after session was created and a subsequent
glusterfind pre was attmepted. glusterfind now detects
this scenario and safely ignores these changes to the
file-system. We also avoid recording deleted file paths
into database in the case where gfid to path resolution
cannot be performed for deleted files.

Also, we now turn on volume option to capture delete paths.

Reviewed-on: http://review.gluster.org/#/c/11199/
Reviewed-on: http://review.gluster.org/#/c/11194/
Change-Id: I4637172fd41c8c3e52f38046babd51dbcff03afb
BUG: 1228017
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/50626
Reviewed-by: Saravanakumar Arumugam <sarumuga@redhat.com>
Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
Tested-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
---
 tools/glusterfind/src/changelog.py |   60 +++++++++++++++++++++--------------
 tools/glusterfind/src/main.py      |    7 ++++
 2 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
index fd2384b..5939d48 100644
--- a/tools/glusterfind/src/changelog.py
+++ b/tools/glusterfind/src/changelog.py
@@ -61,10 +61,13 @@ def pgfid_to_path(brick, changelog_data):
         if row[0] == "":
             continue
 
-        path = symlink_gfid_to_path(brick, row[0])
-        path = output_path_prepare(path, args.output_prefix)
-
-        changelog_data.gfidpath_set_path1(path, row[0])
+        try:
+            path = symlink_gfid_to_path(brick, row[0])
+            path = output_path_prepare(path, args.output_prefix)
+            changelog_data.gfidpath_set_path1(path, row[0])
+        except (IOError, OSError) as e:
+            logger.warn("Error converting to path: %s" % e)
+            continue
 
     # pgfid2 to path2 in case of RENAME
     for row in changelog_data.gfidpath_get_distinct("pgfid2",
@@ -74,13 +77,14 @@ def pgfid_to_path(brick, changelog_data):
         if row[0] == "":
             continue
 
-        path = symlink_gfid_to_path(brick, row[0])
-        if path == "":
+        try:
+            path = symlink_gfid_to_path(brick, row[0])
+            path = output_path_prepare(path, args.output_prefix)
+            changelog_data.gfidpath_set_path2(path, row[0])
+        except (IOError, OSError) as e:
+            logger.warn("Error converting to path: %s" % e)
             continue
 
-        path = output_path_prepare(path, args.output_prefix)
-        changelog_data.gfidpath_set_path2(path, row[0])
-
 
 def populate_pgfid_and_inodegfid(brick, changelog_data):
     """
@@ -94,14 +98,14 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
         p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
         if os.path.islink(p):
             # It is a Directory if GFID backend path is symlink
-            path = symlink_gfid_to_path(brick, gfid)
-            if path == "":
+            try:
+                path = symlink_gfid_to_path(brick, gfid)
+                path = output_path_prepare(path, args.output_prefix)
+                changelog_data.gfidpath_update({"path1": path},
+                                                {"gfid": gfid})
+            except (IOError, OSError) as e:
+                logger.warn("Error converting to path: %s" % e)
                 continue
-
-            path = output_path_prepare(path, args.output_prefix)
-
-            changelog_data.gfidpath_update({"path1": path},
-                                           {"gfid": gfid})
         else:
             try:
                 # INODE and GFID to inodegfid table
@@ -161,12 +165,16 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
                    conf.get_opt("brick_ignore_dirs").split(",")]
 
     for row in changelog_data.pgfid_get():
-        path = symlink_gfid_to_path(brick, row[0])
-        find(os.path.join(brick, path),
-             callback_func=output_callback,
-             filter_func=inode_filter,
-             ignore_dirs=ignore_dirs,
-             subdirs_crawl=False)
+        try:
+            path = symlink_gfid_to_path(brick, row[0])
+            find(os.path.join(brick, path),
+                callback_func=output_callback,
+                filter_func=inode_filter,
+                ignore_dirs=ignore_dirs,
+                subdirs_crawl=False)
+        except (IOError, OSError) as e:
+            logger.warn("Error converting to path: %s" % e)
+            continue
 
 
 def gfid_to_path_using_batchfind(brick, changelog_data):
@@ -291,8 +299,12 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
                 # again in list
                 if change.endswith(".%s" % start):
                     continue
-                parse_changelog_to_db(changelog_data, change)
-                libgfchangelog.cl_history_done(change)
+                try:
+                    parse_changelog_to_db(changelog_data, change)
+                    libgfchangelog.cl_history_done(change)
+                except IOError as e:
+                    logger.warn("Error parsing changelog file %s: %s" %
+                        (change, e))
 
             changelog_data.commit()
     except libgfchangelog.ChangelogException as e:
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
index 850024b..5d5058f 100644
--- a/tools/glusterfind/src/main.py
+++ b/tools/glusterfind/src/main.py
@@ -352,6 +352,13 @@ def mode_create(session_dir, args):
         logger.info("Volume option set %s, changelog.changelog on"
                     % args.volume)
 
+        execute(["gluster", "volume", "set",
+                 args.volume, "changelog.capture-del-path", "on"],
+                exit_msg="Failed to set volume option "
+                "changelog.capture-del-path on", logger=logger)
+        logger.info("Volume option set %s, changelog.capture-del-path on"
+                    % args.volume)
+
     # Add Rollover time to current time to make sure changelogs
     # will be available if we use this time as start time
     time_to_update = int(time.time()) + get_changelog_rollover_time(
-- 
1.7.1