7f4c2a
From bb5ed3d9ddd7e64b0ea99e231192c0f3f73a3729 Mon Sep 17 00:00:00 2001
7f4c2a
From: Milind Changire <mchangir@redhat.com>
7f4c2a
Date: Fri, 12 Jun 2015 16:38:43 +0530
7f4c2a
Subject: [PATCH 70/73] tools/glusterfind: ignoring deleted files
7f4c2a
7f4c2a
OSError and IOError exceptions were being thrown if files
7f4c2a
were deleted after session was created and a subsequent
7f4c2a
glusterfind pre was attmepted. glusterfind now detects
7f4c2a
this scenario and safely ignores these changes to the
7f4c2a
file-system. We also avoid recording deleted file paths
7f4c2a
into database in the case where gfid to path resolution
7f4c2a
cannot be performed for deleted files.
7f4c2a
7f4c2a
Also, we now turn on volume option to capture delete paths.
7f4c2a
7f4c2a
Reviewed-on: http://review.gluster.org/#/c/11199/
7f4c2a
Reviewed-on: http://review.gluster.org/#/c/11194/
7f4c2a
Change-Id: I4637172fd41c8c3e52f38046babd51dbcff03afb
7f4c2a
BUG: 1228017
7f4c2a
Signed-off-by: Milind Changire <mchangir@redhat.com>
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/50626
7f4c2a
Reviewed-by: Saravanakumar Arumugam <sarumuga@redhat.com>
7f4c2a
Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
7f4c2a
Tested-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
7f4c2a
---
7f4c2a
 tools/glusterfind/src/changelog.py |   60 +++++++++++++++++++++--------------
7f4c2a
 tools/glusterfind/src/main.py      |    7 ++++
7f4c2a
 2 files changed, 43 insertions(+), 24 deletions(-)
7f4c2a
7f4c2a
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
7f4c2a
index fd2384b..5939d48 100644
7f4c2a
--- a/tools/glusterfind/src/changelog.py
7f4c2a
+++ b/tools/glusterfind/src/changelog.py
7f4c2a
@@ -61,10 +61,13 @@ def pgfid_to_path(brick, changelog_data):
7f4c2a
         if row[0] == "":
7f4c2a
             continue
7f4c2a
 
7f4c2a
-        path = symlink_gfid_to_path(brick, row[0])
7f4c2a
-        path = output_path_prepare(path, args.output_prefix)
7f4c2a
-
7f4c2a
-        changelog_data.gfidpath_set_path1(path, row[0])
7f4c2a
+        try:
7f4c2a
+            path = symlink_gfid_to_path(brick, row[0])
7f4c2a
+            path = output_path_prepare(path, args.output_prefix)
7f4c2a
+            changelog_data.gfidpath_set_path1(path, row[0])
7f4c2a
+        except (IOError, OSError) as e:
7f4c2a
+            logger.warn("Error converting to path: %s" % e)
7f4c2a
+            continue
7f4c2a
 
7f4c2a
     # pgfid2 to path2 in case of RENAME
7f4c2a
     for row in changelog_data.gfidpath_get_distinct("pgfid2",
7f4c2a
@@ -74,13 +77,14 @@ def pgfid_to_path(brick, changelog_data):
7f4c2a
         if row[0] == "":
7f4c2a
             continue
7f4c2a
 
7f4c2a
-        path = symlink_gfid_to_path(brick, row[0])
7f4c2a
-        if path == "":
7f4c2a
+        try:
7f4c2a
+            path = symlink_gfid_to_path(brick, row[0])
7f4c2a
+            path = output_path_prepare(path, args.output_prefix)
7f4c2a
+            changelog_data.gfidpath_set_path2(path, row[0])
7f4c2a
+        except (IOError, OSError) as e:
7f4c2a
+            logger.warn("Error converting to path: %s" % e)
7f4c2a
             continue
7f4c2a
 
7f4c2a
-        path = output_path_prepare(path, args.output_prefix)
7f4c2a
-        changelog_data.gfidpath_set_path2(path, row[0])
7f4c2a
-
7f4c2a
 
7f4c2a
 def populate_pgfid_and_inodegfid(brick, changelog_data):
7f4c2a
     """
7f4c2a
@@ -94,14 +98,14 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
7f4c2a
         p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
7f4c2a
         if os.path.islink(p):
7f4c2a
             # It is a Directory if GFID backend path is symlink
7f4c2a
-            path = symlink_gfid_to_path(brick, gfid)
7f4c2a
-            if path == "":
7f4c2a
+            try:
7f4c2a
+                path = symlink_gfid_to_path(brick, gfid)
7f4c2a
+                path = output_path_prepare(path, args.output_prefix)
7f4c2a
+                changelog_data.gfidpath_update({"path1": path},
7f4c2a
+                                                {"gfid": gfid})
7f4c2a
+            except (IOError, OSError) as e:
7f4c2a
+                logger.warn("Error converting to path: %s" % e)
7f4c2a
                 continue
7f4c2a
-
7f4c2a
-            path = output_path_prepare(path, args.output_prefix)
7f4c2a
-
7f4c2a
-            changelog_data.gfidpath_update({"path1": path},
7f4c2a
-                                           {"gfid": gfid})
7f4c2a
         else:
7f4c2a
             try:
7f4c2a
                 # INODE and GFID to inodegfid table
7f4c2a
@@ -161,12 +165,16 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
7f4c2a
                    conf.get_opt("brick_ignore_dirs").split(",")]
7f4c2a
 
7f4c2a
     for row in changelog_data.pgfid_get():
7f4c2a
-        path = symlink_gfid_to_path(brick, row[0])
7f4c2a
-        find(os.path.join(brick, path),
7f4c2a
-             callback_func=output_callback,
7f4c2a
-             filter_func=inode_filter,
7f4c2a
-             ignore_dirs=ignore_dirs,
7f4c2a
-             subdirs_crawl=False)
7f4c2a
+        try:
7f4c2a
+            path = symlink_gfid_to_path(brick, row[0])
7f4c2a
+            find(os.path.join(brick, path),
7f4c2a
+                callback_func=output_callback,
7f4c2a
+                filter_func=inode_filter,
7f4c2a
+                ignore_dirs=ignore_dirs,
7f4c2a
+                subdirs_crawl=False)
7f4c2a
+        except (IOError, OSError) as e:
7f4c2a
+            logger.warn("Error converting to path: %s" % e)
7f4c2a
+            continue
7f4c2a
 
7f4c2a
 
7f4c2a
 def gfid_to_path_using_batchfind(brick, changelog_data):
7f4c2a
@@ -291,8 +299,12 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
7f4c2a
                 # again in list
7f4c2a
                 if change.endswith(".%s" % start):
7f4c2a
                     continue
7f4c2a
-                parse_changelog_to_db(changelog_data, change)
7f4c2a
-                libgfchangelog.cl_history_done(change)
7f4c2a
+                try:
7f4c2a
+                    parse_changelog_to_db(changelog_data, change)
7f4c2a
+                    libgfchangelog.cl_history_done(change)
7f4c2a
+                except IOError as e:
7f4c2a
+                    logger.warn("Error parsing changelog file %s: %s" %
7f4c2a
+                        (change, e))
7f4c2a
 
7f4c2a
             changelog_data.commit()
7f4c2a
     except libgfchangelog.ChangelogException as e:
7f4c2a
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
7f4c2a
index 850024b..5d5058f 100644
7f4c2a
--- a/tools/glusterfind/src/main.py
7f4c2a
+++ b/tools/glusterfind/src/main.py
7f4c2a
@@ -352,6 +352,13 @@ def mode_create(session_dir, args):
7f4c2a
         logger.info("Volume option set %s, changelog.changelog on"
7f4c2a
                     % args.volume)
7f4c2a
 
7f4c2a
+        execute(["gluster", "volume", "set",
7f4c2a
+                 args.volume, "changelog.capture-del-path", "on"],
7f4c2a
+                exit_msg="Failed to set volume option "
7f4c2a
+                "changelog.capture-del-path on", logger=logger)
7f4c2a
+        logger.info("Volume option set %s, changelog.capture-del-path on"
7f4c2a
+                    % args.volume)
7f4c2a
+
7f4c2a
     # Add Rollover time to current time to make sure changelogs
7f4c2a
     # will be available if we use this time as start time
7f4c2a
     time_to_update = int(time.time()) + get_changelog_rollover_time(
7f4c2a
-- 
7f4c2a
1.7.1
7f4c2a