Tree - rpms/glusterfs - CentOS Git server

rpms / glusterfs

Blame SOURCES/0064-tools-glusterfind-Handling-Unicode-file-names.patch

Blob History Raw

		12a457	`From 70a866b408cff38f60293b729e715aa56789326f Mon Sep 17 00:00:00 2001`
		12a457	`From: Aravinda VK <avishwan@redhat.com>`
		12a457	`Date: Mon, 21 Mar 2016 16:57:48 +0530`
		12a457	`Subject: [PATCH 64/80] tools/glusterfind: Handling Unicode file names`
		12a457
		12a457	`Unicode filenames handled cleanly with this patch. Changelog`
		12a457	`files and output files are opened with utf-8 encoding using codecs.open.`
		12a457
		12a457	`urllib.quote_plus and unquote_plus will not handle Unicode so, encode`
		12a457	`Unicode to 8-bit string version before calling unquote. urllib.quote_plus`
		12a457	`requires 8-bit string itself so do not decode to Unicode if we need to use`
		12a457	`quote_plus(when --no-encode=false). Decode to unicode in --no-encode is set.`
		12a457
		12a457	`BUG: 1309437`
		12a457	`Change-Id: If5561c749ab5529445650d322c831eb4da22b65a`
		12a457	`Signed-off-by: Aravinda VK <avishwan@redhat.com>`
		12a457	`Reviewed-on: http://review.gluster.org/13798`
		12a457	`Reviewed-by: Milind Changire <mchangir@redhat.com>`
		12a457	`Reviewed-by: Kotresh HR <khiremat@redhat.com>`
		12a457	`Reviewed-on: http://review.gluster.org/13856`
		12a457	`Reviewed-on: https://code.engineering.redhat.com/gerrit/71831`
		12a457	`Reviewed-by: Kotresh Hiremath Ravishankar <khiremat@redhat.com>`
		12a457	`---`
		12a457	`tools/glusterfind/src/changelog.py \| 3 +-`
		12a457	`tools/glusterfind/src/changelogdata.py \| 65 +++++++++++++-------------------`
		12a457	`tools/glusterfind/src/main.py \| 12 ++---`
		12a457	`tools/glusterfind/src/utils.py \| 2 +-`
		12a457	`4 files changed, 34 insertions(+), 48 deletions(-)`
		12a457
		12a457	`diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py`
		12a457	`index a58a7eb..283a035 100644`
		12a457	`--- a/tools/glusterfind/src/changelog.py`
		12a457	`+++ b/tools/glusterfind/src/changelog.py`
		12a457	`@@ -17,6 +17,7 @@ import logging`
		12a457	`from argparse import ArgumentParser, RawDescriptionHelpFormatter`
		12a457	`import hashlib`
		12a457	`import urllib`
		12a457	`+import codecs`
		12a457
		12a457	`import libgfchangelog`
		12a457	`from utils import mkdirp, symlink_gfid_to_path`
		12a457	`@@ -212,7 +213,7 @@ def parse_changelog_to_db(changelog_data, filename, args):`
		12a457	`"""`
		12a457	`Parses a Changelog file and populates data in gfidpath table`
		12a457	`"""`
		12a457	`- with open(filename) as f:`
		12a457	`+ with codecs.open(filename, encoding="utf-8") as f:`
		12a457	`changelogfile = os.path.basename(filename)`
		12a457	`for line in f:`
		12a457	`data = line.strip().split(" ")`
		12a457	`diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py`
		12a457	`index abb8b01..0e32d7b 100644`
		12a457	`--- a/tools/glusterfind/src/changelogdata.py`
		12a457	`+++ b/tools/glusterfind/src/changelogdata.py`
		12a457	`@@ -146,10 +146,7 @@ class ChangelogData(object):`
		12a457
		12a457	`for key, value in filters.items():`
		12a457	`query += " AND %s = ?" % key`
		12a457	`- if isinstance(value, int):`
		12a457	`- params.append(value)`
		12a457	`- else:`
		12a457	`- params.append(unicode(value, "utf8"))`
		12a457	`+ params.append(value)`
		12a457
		12a457	`return self.cursor_reader.execute(query, params)`
		12a457
		12a457	`@@ -161,10 +158,7 @@ class ChangelogData(object):`
		12a457
		12a457	`for key, value in filters.items():`
		12a457	`query += " AND %s = ?" % key`
		12a457	`- if isinstance(value, int):`
		12a457	`- params.append(value)`
		12a457	`- else:`
		12a457	`- params.append(unicode(value, "utf8"))`
		12a457	`+ params.append(value)`
		12a457
		12a457	`return self.cursor_reader.execute(query, params)`
		12a457
		12a457	`@@ -175,10 +169,7 @@ class ChangelogData(object):`
		12a457
		12a457	`for key, value in filters.items():`
		12a457	`query += " AND %s = ?" % key`
		12a457	`- if isinstance(value, int):`
		12a457	`- params.append(value)`
		12a457	`- else:`
		12a457	`- params.append(unicode(value, "utf8"))`
		12a457	`+ params.append(value)`
		12a457
		12a457	`self.cursor.execute(query, params)`
		12a457
		12a457	`@@ -189,10 +180,7 @@ class ChangelogData(object):`
		12a457	`params = []`
		12a457	`for key, value in data.items():`
		12a457	`fields.append(key)`
		12a457	`- if isinstance(value, int):`
		12a457	`- params.append(value)`
		12a457	`- else:`
		12a457	`- params.append(unicode(value, "utf8"))`
		12a457	`+ params.append(value)`
		12a457
		12a457	`values_substitute = len(fields)*["?"]`
		12a457	`query += "%s) VALUES(%s)" % (",".join(fields),`
		12a457	`@@ -205,20 +193,14 @@ class ChangelogData(object):`
		12a457	`update_fields = []`
		12a457	`for key, value in data.items():`
		12a457	`update_fields.append("%s = ?" % key)`
		12a457	`- if isinstance(value, int):`
		12a457	`- params.append(value)`
		12a457	`- else:`
		12a457	`- params.append(unicode(value, "utf8"))`
		12a457	`+ params.append(value)`
		12a457
		12a457	`query = "UPDATE %s SET %s WHERE 1 = 1" % (tablename,`
		12a457	`", ".join(update_fields))`
		12a457
		12a457	`for key, value in filters.items():`
		12a457	`query += " AND %s = ?" % key`
		12a457	`- if isinstance(value, int):`
		12a457	`- params.append(value)`
		12a457	`- else:`
		12a457	`- params.append(unicode(value, "utf8"))`
		12a457	`+ params.append(value)`
		12a457
		12a457	`self.cursor.execute(query, params)`
		12a457
		12a457	`@@ -230,12 +212,8 @@ class ChangelogData(object):`
		12a457	`params = []`
		12a457
		12a457	`for key, value in filters.items():`
		12a457	`- print value`
		12a457	`query += " AND %s = ?" % key`
		12a457	`- if isinstance(value, int):`
		12a457	`- params.append(value)`
		12a457	`- else:`
		12a457	`- params.append(unicode(value, "utf8"))`
		12a457	`+ params.append(value)`
		12a457
		12a457	`self.cursor.execute(query, params)`
		12a457	`row = self.cursor.fetchone()`
		12a457	`@@ -344,10 +322,15 @@ class ChangelogData(object):`
		12a457	`def when_create_mknod_mkdir(self, changelogfile, data):`
		12a457	`# E <GFID> <MKNOD\|CREATE\|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>`
		12a457	`# Add the Entry to DB`
		12a457	`- pgfid1, bn1 = urllib.unquote_plus(data[6]).split("/", 1)`
		12a457	`+ # urllib.unquote_plus will not handle unicode so, encode Unicode to`
		12a457	`+ # represent in 8 bit format and then unquote`
		12a457	`+ pgfid1, bn1 = urllib.unquote_plus(`
		12a457	`+ data[6].encode("utf-8")).split("/", 1)`
		12a457
		12a457	`if self.args.no_encode:`
		12a457	`- bn1 = bn1.strip()`
		12a457	`+ # No urlencode since no_encode is set, so convert again to Unicode`
		12a457	`+ # format from previously encoded.`
		12a457	`+ bn1 = bn1.decode("utf-8").strip()`
		12a457	`else:`
		12a457	`# Quote again the basename`
		12a457	`bn1 = urllib.quote_plus(bn1.strip())`
		12a457	`@@ -356,13 +339,15 @@ class ChangelogData(object):`
		12a457
		12a457	`def when_rename(self, changelogfile, data):`
		12a457	`# E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>`
		12a457	`- pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)`
		12a457	`- pgfid2, bn2 = urllib.unquote_plus(data[4]).split("/", 1)`
		12a457	`+ pgfid1, bn1 = urllib.unquote_plus(`
		12a457	`+ data[3].encode("utf-8")).split("/", 1)`
		12a457	`+ pgfid2, bn2 = urllib.unquote_plus(`
		12a457	`+ data[4].encode("utf-8")).split("/", 1)`
		12a457
		12a457	`if self.args.no_encode:`
		12a457	`# Quote again the basename`
		12a457	`- bn1 = bn1.strip()`
		12a457	`- bn2 = bn2.strip()`
		12a457	`+ bn1 = bn1.decode("utf-8").strip()`
		12a457	`+ bn2 = bn2.decode("utf-8").strip()`
		12a457	`else:`
		12a457	`# Quote again the basename`
		12a457	`bn1 = urllib.quote_plus(bn1.strip())`
		12a457	`@@ -406,10 +391,11 @@ class ChangelogData(object):`
		12a457	`def when_link_symlink(self, changelogfile, data):`
		12a457	`# E <GFID> <LINK\|SYMLINK> <PGFID>/<BASENAME>`
		12a457	`# Add as New record in Db as Type NEW`
		12a457	`- pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)`
		12a457	`+ pgfid1, bn1 = urllib.unquote_plus(`
		12a457	`+ data[3].encode("utf-8")).split("/", 1)`
		12a457	`if self.args.no_encode:`
		12a457	`# Quote again the basename`
		12a457	`- bn1 = bn1.strip()`
		12a457	`+ bn1 = bn1.decode("utf-8").strip()`
		12a457	`else:`
		12a457	`# Quote again the basename`
		12a457	`bn1 = urllib.quote_plus(bn1.strip())`
		12a457	`@@ -424,10 +410,11 @@ class ChangelogData(object):`
		12a457
		12a457	`def when_unlink_rmdir(self, changelogfile, data):`
		12a457	`# E <GFID> <UNLINK\|RMDIR> <PGFID>/<BASENAME>`
		12a457	`- pgfid1, bn1 = urllib.unquote_plus(data[3]).split("/", 1)`
		12a457	`+ pgfid1, bn1 = urllib.unquote_plus(`
		12a457	`+ data[3].encode("utf-8")).split("/", 1)`
		12a457
		12a457	`if self.args.no_encode:`
		12a457	`- bn1 = bn1.strip()`
		12a457	`+ bn1 = bn1.decode("utf-8").strip()`
		12a457	`else:`
		12a457	`# Quote again the basename`
		12a457	`bn1 = urllib.quote_plus(bn1.strip())`
		12a457	`diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py`
		12a457	`index 72f11a7..a87fac4 100644`
		12a457	`--- a/tools/glusterfind/src/main.py`
		12a457	`+++ b/tools/glusterfind/src/main.py`
		12a457	`@@ -24,6 +24,7 @@ from utils import setup_logger, human_time, handle_rm_error`
		12a457	`from utils import get_changelog_rollover_time, cache_output, create_file`
		12a457	`import conf`
		12a457	`from changelogdata import OutputMerger`
		12a457	`+import codecs`
		12a457
		12a457	`PROG_DESCRIPTION = """`
		12a457	`GlusterFS Incremental API`
		12a457	`@@ -394,7 +395,7 @@ def enable_volume_options(args):`
		12a457
		12a457
		12a457	`def write_output(args, outfilemerger):`
		12a457	`- with open(args.outfile, "a") as f:`
		12a457	`+ with codecs.open(args.outfile, "a", encoding="utf-8") as f:`
		12a457	`for row in outfilemerger.get():`
		12a457	`# Multiple paths in case of Hardlinks`
		12a457	`paths = row[1].split(",")`
		12a457	`@@ -409,12 +410,9 @@ def write_output(args, outfilemerger):`
		12a457	`if p_rep == row_2_rep:`
		12a457	`continue`
		12a457
		12a457	`- p_rep = p_rep.encode('utf8', 'replace')`
		12a457	`- row_2_rep = row_2_rep.encode('utf8', 'replace')`
		12a457	`-`
		12a457	`- f.write("{0} {1} {2}\n".format(row[0],`
		12a457	`- p_rep,`
		12a457	`- row_2_rep))`
		12a457	`+ f.write(u"{0} {1} {2}\n".format(row[0],`
		12a457	`+ p_rep,`
		12a457	`+ row_2_rep))`
		12a457
		12a457
		12a457	`def mode_create(session_dir, args):`
		12a457	`diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py`
		12a457	`index b3b0bdf..b05f08e 100644`
		12a457	`--- a/tools/glusterfind/src/utils.py`
		12a457	`+++ b/tools/glusterfind/src/utils.py`
		12a457	`@@ -242,4 +242,4 @@ def output_path_prepare(path, args):`
		12a457	`if args.no_encode:`
		12a457	`return path`
		12a457	`else:`
		12a457	`- return urllib.quote_plus(path)`
		12a457	`+ return urllib.quote_plus(path.encode("utf-8"))`
		12a457	`--`
		12a457	`1.7.1`
		12a457

rpms / glusterfs

Source Code

Blame SOURCES/0064-tools-glusterfind-Handling-Unicode-file-names.patch