cb8e9e
From bb5ed3d9ddd7e64b0ea99e231192c0f3f73a3729 Mon Sep 17 00:00:00 2001
cb8e9e
From: Milind Changire <mchangir@redhat.com>
cb8e9e
Date: Fri, 12 Jun 2015 16:38:43 +0530
cb8e9e
Subject: [PATCH 70/73] tools/glusterfind: ignoring deleted files
cb8e9e
cb8e9e
OSError and IOError exceptions were being thrown if files
cb8e9e
were deleted after session was created and a subsequent
cb8e9e
glusterfind pre was attmepted. glusterfind now detects
cb8e9e
this scenario and safely ignores these changes to the
cb8e9e
file-system. We also avoid recording deleted file paths
cb8e9e
into database in the case where gfid to path resolution
cb8e9e
cannot be performed for deleted files.
cb8e9e
cb8e9e
Also, we now turn on volume option to capture delete paths.
cb8e9e
cb8e9e
Reviewed-on: http://review.gluster.org/#/c/11199/
cb8e9e
Reviewed-on: http://review.gluster.org/#/c/11194/
cb8e9e
Change-Id: I4637172fd41c8c3e52f38046babd51dbcff03afb
cb8e9e
BUG: 1228017
cb8e9e
Signed-off-by: Milind Changire <mchangir@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/50626
cb8e9e
Reviewed-by: Saravanakumar Arumugam <sarumuga@redhat.com>
cb8e9e
Reviewed-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
cb8e9e
Tested-by: Aravinda Vishwanathapura Krishna Murthy <avishwan@redhat.com>
cb8e9e
---
cb8e9e
 tools/glusterfind/src/changelog.py |   60 +++++++++++++++++++++--------------
cb8e9e
 tools/glusterfind/src/main.py      |    7 ++++
cb8e9e
 2 files changed, 43 insertions(+), 24 deletions(-)
cb8e9e
cb8e9e
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
cb8e9e
index fd2384b..5939d48 100644
cb8e9e
--- a/tools/glusterfind/src/changelog.py
cb8e9e
+++ b/tools/glusterfind/src/changelog.py
cb8e9e
@@ -61,10 +61,13 @@ def pgfid_to_path(brick, changelog_data):
cb8e9e
         if row[0] == "":
cb8e9e
             continue
cb8e9e
 
cb8e9e
-        path = symlink_gfid_to_path(brick, row[0])
cb8e9e
-        path = output_path_prepare(path, args.output_prefix)
cb8e9e
-
cb8e9e
-        changelog_data.gfidpath_set_path1(path, row[0])
cb8e9e
+        try:
cb8e9e
+            path = symlink_gfid_to_path(brick, row[0])
cb8e9e
+            path = output_path_prepare(path, args.output_prefix)
cb8e9e
+            changelog_data.gfidpath_set_path1(path, row[0])
cb8e9e
+        except (IOError, OSError) as e:
cb8e9e
+            logger.warn("Error converting to path: %s" % e)
cb8e9e
+            continue
cb8e9e
 
cb8e9e
     # pgfid2 to path2 in case of RENAME
cb8e9e
     for row in changelog_data.gfidpath_get_distinct("pgfid2",
cb8e9e
@@ -74,13 +77,14 @@ def pgfid_to_path(brick, changelog_data):
cb8e9e
         if row[0] == "":
cb8e9e
             continue
cb8e9e
 
cb8e9e
-        path = symlink_gfid_to_path(brick, row[0])
cb8e9e
-        if path == "":
cb8e9e
+        try:
cb8e9e
+            path = symlink_gfid_to_path(brick, row[0])
cb8e9e
+            path = output_path_prepare(path, args.output_prefix)
cb8e9e
+            changelog_data.gfidpath_set_path2(path, row[0])
cb8e9e
+        except (IOError, OSError) as e:
cb8e9e
+            logger.warn("Error converting to path: %s" % e)
cb8e9e
             continue
cb8e9e
 
cb8e9e
-        path = output_path_prepare(path, args.output_prefix)
cb8e9e
-        changelog_data.gfidpath_set_path2(path, row[0])
cb8e9e
-
cb8e9e
 
cb8e9e
 def populate_pgfid_and_inodegfid(brick, changelog_data):
cb8e9e
     """
cb8e9e
@@ -94,14 +98,14 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
cb8e9e
         p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
cb8e9e
         if os.path.islink(p):
cb8e9e
             # It is a Directory if GFID backend path is symlink
cb8e9e
-            path = symlink_gfid_to_path(brick, gfid)
cb8e9e
-            if path == "":
cb8e9e
+            try:
cb8e9e
+                path = symlink_gfid_to_path(brick, gfid)
cb8e9e
+                path = output_path_prepare(path, args.output_prefix)
cb8e9e
+                changelog_data.gfidpath_update({"path1": path},
cb8e9e
+                                                {"gfid": gfid})
cb8e9e
+            except (IOError, OSError) as e:
cb8e9e
+                logger.warn("Error converting to path: %s" % e)
cb8e9e
                 continue
cb8e9e
-
cb8e9e
-            path = output_path_prepare(path, args.output_prefix)
cb8e9e
-
cb8e9e
-            changelog_data.gfidpath_update({"path1": path},
cb8e9e
-                                           {"gfid": gfid})
cb8e9e
         else:
cb8e9e
             try:
cb8e9e
                 # INODE and GFID to inodegfid table
cb8e9e
@@ -161,12 +165,16 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
cb8e9e
                    conf.get_opt("brick_ignore_dirs").split(",")]
cb8e9e
 
cb8e9e
     for row in changelog_data.pgfid_get():
cb8e9e
-        path = symlink_gfid_to_path(brick, row[0])
cb8e9e
-        find(os.path.join(brick, path),
cb8e9e
-             callback_func=output_callback,
cb8e9e
-             filter_func=inode_filter,
cb8e9e
-             ignore_dirs=ignore_dirs,
cb8e9e
-             subdirs_crawl=False)
cb8e9e
+        try:
cb8e9e
+            path = symlink_gfid_to_path(brick, row[0])
cb8e9e
+            find(os.path.join(brick, path),
cb8e9e
+                callback_func=output_callback,
cb8e9e
+                filter_func=inode_filter,
cb8e9e
+                ignore_dirs=ignore_dirs,
cb8e9e
+                subdirs_crawl=False)
cb8e9e
+        except (IOError, OSError) as e:
cb8e9e
+            logger.warn("Error converting to path: %s" % e)
cb8e9e
+            continue
cb8e9e
 
cb8e9e
 
cb8e9e
 def gfid_to_path_using_batchfind(brick, changelog_data):
cb8e9e
@@ -291,8 +299,12 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
cb8e9e
                 # again in list
cb8e9e
                 if change.endswith(".%s" % start):
cb8e9e
                     continue
cb8e9e
-                parse_changelog_to_db(changelog_data, change)
cb8e9e
-                libgfchangelog.cl_history_done(change)
cb8e9e
+                try:
cb8e9e
+                    parse_changelog_to_db(changelog_data, change)
cb8e9e
+                    libgfchangelog.cl_history_done(change)
cb8e9e
+                except IOError as e:
cb8e9e
+                    logger.warn("Error parsing changelog file %s: %s" %
cb8e9e
+                        (change, e))
cb8e9e
 
cb8e9e
             changelog_data.commit()
cb8e9e
     except libgfchangelog.ChangelogException as e:
cb8e9e
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
cb8e9e
index 850024b..5d5058f 100644
cb8e9e
--- a/tools/glusterfind/src/main.py
cb8e9e
+++ b/tools/glusterfind/src/main.py
cb8e9e
@@ -352,6 +352,13 @@ def mode_create(session_dir, args):
cb8e9e
         logger.info("Volume option set %s, changelog.changelog on"
cb8e9e
                     % args.volume)
cb8e9e
 
cb8e9e
+        execute(["gluster", "volume", "set",
cb8e9e
+                 args.volume, "changelog.capture-del-path", "on"],
cb8e9e
+                exit_msg="Failed to set volume option "
cb8e9e
+                "changelog.capture-del-path on", logger=logger)
cb8e9e
+        logger.info("Volume option set %s, changelog.capture-del-path on"
cb8e9e
+                    % args.volume)
cb8e9e
+
cb8e9e
     # Add Rollover time to current time to make sure changelogs
cb8e9e
     # will be available if we use this time as start time
cb8e9e
     time_to_update = int(time.time()) + get_changelog_rollover_time(
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e