|
|
12a457 |
From 70a866b408cff38f60293b729e715aa56789326f Mon Sep 17 00:00:00 2001
|
|
|
12a457 |
From: Aravinda VK <avishwan@redhat.com>
|
|
|
12a457 |
Date: Mon, 21 Mar 2016 16:57:48 +0530
|
|
|
12a457 |
Subject: [PATCH 64/80] tools/glusterfind: Handling Unicode file names
|
|
|
12a457 |
|
|
|
12a457 |
Unicode filenames handled cleanly with this patch. Changelog
|
|
|
12a457 |
files and output files are opened with utf-8 encoding using codecs.open.
|
|
|
12a457 |
|
|
|
12a457 |
urllib.quote_plus and unquote_plus will not handle Unicode so, encode
|
|
|
12a457 |
Unicode to 8-bit string version before calling unquote. urllib.quote_plus
|
|
|
12a457 |
requires 8-bit string itself so do not decode to Unicode if we need to use
|
|
|
12a457 |
quote_plus(when --no-encode=false). Decode to unicode in --no-encode is set.
|
|
|
12a457 |
|
|
|
12a457 |
BUG: 1309437
|
|
|
12a457 |
Change-Id: If5561c749ab5529445650d322c831eb4da22b65a
|
|
|
12a457 |
Signed-off-by: Aravinda VK <avishwan@redhat.com>
|
|
|
12a457 |
Reviewed-on: http://review.gluster.org/13798
|
|
|
12a457 |
Reviewed-by: Milind Changire <mchangir@redhat.com>
|
|
|
12a457 |
Reviewed-by: Kotresh HR <khiremat@redhat.com>
|
|
|
12a457 |
Reviewed-on: http://review.gluster.org/13856
|
|
|
12a457 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/71831
|
|
|
12a457 |
Reviewed-by: Kotresh Hiremath Ravishankar <khiremat@redhat.com>
|
|
|
12a457 |
---
|
|
|
12a457 |
tools/glusterfind/src/changelog.py | 3 +-
|
|
|
12a457 |
tools/glusterfind/src/changelogdata.py | 65 +++++++++++++-------------------
|
|
|
12a457 |
tools/glusterfind/src/main.py | 12 ++---
|
|
|
12a457 |
tools/glusterfind/src/utils.py | 2 +-
|
|
|
12a457 |
4 files changed, 34 insertions(+), 48 deletions(-)
|
|
|
12a457 |
|
|
|
12a457 |
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
|
|
|
12a457 |
index a58a7eb..283a035 100644
|
|
|
12a457 |
--- a/tools/glusterfind/src/changelog.py
|
|
|
12a457 |
+++ b/tools/glusterfind/src/changelog.py
|
|
|
12a457 |
@@ -17,6 +17,7 @@ import logging
|
|
|
12a457 |
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
|
|
12a457 |
import hashlib
|
|
|
12a457 |
import urllib
|
|
|
12a457 |
+import codecs
|
|
|
12a457 |
|
|
|
12a457 |
import libgfchangelog
|
|
|
12a457 |
from utils import mkdirp, symlink_gfid_to_path
|
|
|
12a457 |
@@ -212,7 +213,7 @@ def parse_changelog_to_db(changelog_data, filename, args):
|
|
|
12a457 |
"""
|
|
|
12a457 |
Parses a Changelog file and populates data in gfidpath table
|
|
|
12a457 |
"""
|
|
|
12a457 |
- with open(filename) as f:
|
|
|
12a457 |
+ with codecs.open(filename, encoding="utf-8") as f:
|
|
|
12a457 |
changelogfile = os.path.basename(filename)
|
|
|
12a457 |
for line in f:
|
|
|
12a457 |
data = line.strip().split(" ")
|
|
|
12a457 |
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
|
|
|
12a457 |
index abb8b01..0e32d7b 100644
|
|
|
12a457 |
--- a/tools/glusterfind/src/changelogdata.py
|
|
|
12a457 |
+++ b/tools/glusterfind/src/changelogdata.py
|
|
|
12a457 |
@@ -146,10 +146,7 @@ class ChangelogData(object):
|
|
|
12a457 |
|
|
|
12a457 |
for key, value in filters.items():
|
|
|
12a457 |
query += " AND %s = ?" % key
|
|
|
12a457 |
- if isinstance(value, int):
|
|
|
12a457 |
- params.append(value)
|
|
|
12a457 |
- else:
|
|
|
12a457 |
- params.append(unicode(value, "utf8"))
|
|
|
12a457 |
+ params.append(value)
|
|
|
12a457 |
|
|
|
12a457 |
return self.cursor_reader.execute(query, params)
|
|
|
12a457 |
|
|
|
12a457 |
@@ -161,10 +158,7 @@ class ChangelogData(object):
|
|
|
12a457 |
|
|
|
12a457 |
for key, value in filters.items():
|
|
|
12a457 |
query += " AND %s = ?" % key
|
|
|
12a457 |
- if isinstance(value, int):
|
|
|
12a457 |
- params.append(value)
|
|
|
12a457 |
- else:
|
|
|
12a457 |
- params.append(unicode(value, "utf8"))
|
|
|
12a457 |
+ params.append(value)
|
|
|
12a457 |
|
|
|
12a457 |
return self.cursor_reader.execute(query, params)
|
|
|
12a457 |
|
|
|
12a457 |
@@ -175,10 +169,7 @@ class ChangelogData(object):
|
|
|
12a457 |
|
|
|
12a457 |
for key, value in filters.items():
|
|
|
12a457 |
query += " AND %s = ?" % key
|
|
|
12a457 |
- if isinstance(value, int):
|
|
|
12a457 |
- params.append(value)
|
|
|
12a457 |
- else:
|
|
|
12a457 |
- params.append(unicode(value, "utf8"))
|
|
|
12a457 |
+ params.append(value)
|
|
|
12a457 |
|
|
|
12a457 |
self.cursor.execute(query, params)
|
|
|
12a457 |
|
|
|
12a457 |
@@ -189,10 +180,7 @@ class ChangelogData(object):
|
|
|
12a457 |
params = []
|
|
|
12a457 |
for key, value in data.items():
|
|
|
12a457 |
fields.append(key)
|
|
|
12a457 |
- if isinstance(value, int):
|
|
|
12a457 |
- params.append(value)
|
|
|
12a457 |
- else:
|
|
|
12a457 |
- params.append(unicode(value, "utf8"))
|
|
|
12a457 |
+ params.append(value)
|
|
|
12a457 |
|
|
|
12a457 |
values_substitute = len(fields)*["?"]
|
|
|
12a457 |
query += "%s) VALUES(%s)" % (",".join(fields),
|
|
|
12a457 |
@@ -205,20 +193,14 @@ class ChangelogData(object):
|
|
|
12a457 |
update_fields = []
|
|
|
12a457 |
for key, value in data.items():
|
|
|
12a457 |
update_fields.append("%s = ?" % key)
|
|
|
12a457 |
- if isinstance(value, int):
|
|
|
12a457 |
- params.append(value)
|
|
|
12a457 |
- else:
|
|
|
12a457 |
- params.append(unicode(value, "utf8"))
|
|
|
12a457 |
+ params.append(value)
|
|
|
12a457 |
|
|
|
12a457 |
query = "UPDATE %s SET %s WHERE 1 = 1" % (tablename,
|
|
|
12a457 |
", ".join(update_fields))
|
|
|
12a457 |
|
|
|
12a457 |
for key, value in filters.items():
|
|
|
12a457 |
query += " AND %s = ?" % key
|
|
|
12a457 |
- if isinstance(value, int):
|
|
|
12a457 |
- params.append(value)
|
|
|
12a457 |
- else:
|
|
|
12a457 |
- params.append(unicode(value, "utf8"))
|
|
|
12a457 |
+ params.append(value)
|
|
|
12a457 |
|
|
|
12a457 |
self.cursor.execute(query, params)
|
|
|
12a457 |
|
|
|
12a457 |
@@ -230,12 +212,8 @@ class ChangelogData(object):
|
|
|
12a457 |
params = []
|
|
|
12a457 |
|
|
|
12a457 |
for key, value in filters.items():
|
|
|
12a457 |
- print value
|
|
|
12a457 |
query += " AND %s = ?" % key
|
|
|
12a457 |
- if isinstance(value, int):
|
|
|
12a457 |
- params.append(value)
|
|
|
12a457 |
- else:
|
|
|
12a457 |
- params.append(unicode(value, "utf8"))
|
|
|
12a457 |
+ params.append(value)
|
|
|
12a457 |
|
|
|
12a457 |
self.cursor.execute(query, params)
|
|
|
12a457 |
row = self.cursor.fetchone()
|
|
|
12a457 |
@@ -344,10 +322,15 @@ class ChangelogData(object):
|
|
|
12a457 |
def when_create_mknod_mkdir(self, changelogfile, data):
|
|
|
12a457 |
# E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>
|
|
|
12a457 |
# Add the Entry to DB
|
|
|
12a457 |
- pgfid1, bn1 = urllib.unquote_plus(data[6]).split("/", 1)
|
|
|
12a457 |
+ # urllib.unquote_plus will not handle unicode so, encode Unicode to
|
|
|
12a457 |
+ # represent in 8 bit format and then unquote
|
|
|
12a457 |
+ pgfid1, bn1 = urllib.unquote_plus(
|
|
|
12a457 |
+ data[6].encode("utf-8")).split("/", 1)
|
|
|
12a457 |
|
|
|
12a457 |
if self.args.no_encode:
|
|
|
12a457 |
- bn1 = bn1.strip()
|
|
|
12a457 |
+ # No urlencode since no_encode is set, so convert again to Unicode
|
|
|
12a457 |
+ # format from previously encoded.
|
|
|
12a457 |
+ bn1 = bn1.decode("utf-8").strip()
|
|
|
12a457 |
else:
|
|
|
12a457 |
# Quote again the basename
|
|
|
12a457 |
bn1 = urllib.quote_plus(bn1.strip())
|
|
|
12a457 |
@@ -356,13 +339,15 @@ class ChangelogData(object):
|
|
|
12a457 |
|
|
|
12a457 |
def when_rename(self, changelogfile, data):
|
|
|
12a457 |
# E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>
|
|
|
12a457 |
- pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)
|
|
|
12a457 |
- pgfid2, bn2 = urllib.unquote_plus(data[4]).split("/", 1)
|
|
|
12a457 |
+ pgfid1, bn1 = urllib.unquote_plus(
|
|
|
12a457 |
+ data[3].encode("utf-8")).split("/", 1)
|
|
|
12a457 |
+ pgfid2, bn2 = urllib.unquote_plus(
|
|
|
12a457 |
+ data[4].encode("utf-8")).split("/", 1)
|
|
|
12a457 |
|
|
|
12a457 |
if self.args.no_encode:
|
|
|
12a457 |
# Quote again the basename
|
|
|
12a457 |
- bn1 = bn1.strip()
|
|
|
12a457 |
- bn2 = bn2.strip()
|
|
|
12a457 |
+ bn1 = bn1.decode("utf-8").strip()
|
|
|
12a457 |
+ bn2 = bn2.decode("utf-8").strip()
|
|
|
12a457 |
else:
|
|
|
12a457 |
# Quote again the basename
|
|
|
12a457 |
bn1 = urllib.quote_plus(bn1.strip())
|
|
|
12a457 |
@@ -406,10 +391,11 @@ class ChangelogData(object):
|
|
|
12a457 |
def when_link_symlink(self, changelogfile, data):
|
|
|
12a457 |
# E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
|
|
|
12a457 |
# Add as New record in Db as Type NEW
|
|
|
12a457 |
- pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)
|
|
|
12a457 |
+ pgfid1, bn1 = urllib.unquote_plus(
|
|
|
12a457 |
+ data[3].encode("utf-8")).split("/", 1)
|
|
|
12a457 |
if self.args.no_encode:
|
|
|
12a457 |
# Quote again the basename
|
|
|
12a457 |
- bn1 = bn1.strip()
|
|
|
12a457 |
+ bn1 = bn1.decode("utf-8").strip()
|
|
|
12a457 |
else:
|
|
|
12a457 |
# Quote again the basename
|
|
|
12a457 |
bn1 = urllib.quote_plus(bn1.strip())
|
|
|
12a457 |
@@ -424,10 +410,11 @@ class ChangelogData(object):
|
|
|
12a457 |
|
|
|
12a457 |
def when_unlink_rmdir(self, changelogfile, data):
|
|
|
12a457 |
# E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
|
|
|
12a457 |
- pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)
|
|
|
12a457 |
+ pgfid1, bn1 = urllib.unquote_plus(
|
|
|
12a457 |
+ data[3].encode("utf-8")).split("/", 1)
|
|
|
12a457 |
|
|
|
12a457 |
if self.args.no_encode:
|
|
|
12a457 |
- bn1 = bn1.strip()
|
|
|
12a457 |
+ bn1 = bn1.decode("utf-8").strip()
|
|
|
12a457 |
else:
|
|
|
12a457 |
# Quote again the basename
|
|
|
12a457 |
bn1 = urllib.quote_plus(bn1.strip())
|
|
|
12a457 |
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
|
|
|
12a457 |
index 72f11a7..a87fac4 100644
|
|
|
12a457 |
--- a/tools/glusterfind/src/main.py
|
|
|
12a457 |
+++ b/tools/glusterfind/src/main.py
|
|
|
12a457 |
@@ -24,6 +24,7 @@ from utils import setup_logger, human_time, handle_rm_error
|
|
|
12a457 |
from utils import get_changelog_rollover_time, cache_output, create_file
|
|
|
12a457 |
import conf
|
|
|
12a457 |
from changelogdata import OutputMerger
|
|
|
12a457 |
+import codecs
|
|
|
12a457 |
|
|
|
12a457 |
PROG_DESCRIPTION = """
|
|
|
12a457 |
GlusterFS Incremental API
|
|
|
12a457 |
@@ -394,7 +395,7 @@ def enable_volume_options(args):
|
|
|
12a457 |
|
|
|
12a457 |
|
|
|
12a457 |
def write_output(args, outfilemerger):
|
|
|
12a457 |
- with open(args.outfile, "a") as f:
|
|
|
12a457 |
+ with codecs.open(args.outfile, "a", encoding="utf-8") as f:
|
|
|
12a457 |
for row in outfilemerger.get():
|
|
|
12a457 |
# Multiple paths in case of Hardlinks
|
|
|
12a457 |
paths = row[1].split(",")
|
|
|
12a457 |
@@ -409,12 +410,9 @@ def write_output(args, outfilemerger):
|
|
|
12a457 |
if p_rep == row_2_rep:
|
|
|
12a457 |
continue
|
|
|
12a457 |
|
|
|
12a457 |
- p_rep = p_rep.encode('utf8', 'replace')
|
|
|
12a457 |
- row_2_rep = row_2_rep.encode('utf8', 'replace')
|
|
|
12a457 |
-
|
|
|
12a457 |
- f.write("{0} {1} {2}\n".format(row[0],
|
|
|
12a457 |
- p_rep,
|
|
|
12a457 |
- row_2_rep))
|
|
|
12a457 |
+ f.write(u"{0} {1} {2}\n".format(row[0],
|
|
|
12a457 |
+ p_rep,
|
|
|
12a457 |
+ row_2_rep))
|
|
|
12a457 |
|
|
|
12a457 |
|
|
|
12a457 |
def mode_create(session_dir, args):
|
|
|
12a457 |
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
|
|
|
12a457 |
index b3b0bdf..b05f08e 100644
|
|
|
12a457 |
--- a/tools/glusterfind/src/utils.py
|
|
|
12a457 |
+++ b/tools/glusterfind/src/utils.py
|
|
|
12a457 |
@@ -242,4 +242,4 @@ def output_path_prepare(path, args):
|
|
|
12a457 |
if args.no_encode:
|
|
|
12a457 |
return path
|
|
|
12a457 |
else:
|
|
|
12a457 |
- return urllib.quote_plus(path)
|
|
|
12a457 |
+ return urllib.quote_plus(path.encode("utf-8"))
|
|
|
12a457 |
--
|
|
|
12a457 |
1.7.1
|
|
|
12a457 |
|