Blame SOURCES/BZ-1391507-fix-filelist-queries-for-dup-pkgs.patch

5e9bef
diff -up yum-3.4.3/yum/sqlitesack.py.orig yum-3.4.3/yum/sqlitesack.py
5e9bef
--- yum-3.4.3/yum/sqlitesack.py.orig	2017-02-08 18:13:03.646086042 +0100
5e9bef
+++ yum-3.4.3/yum/sqlitesack.py	2017-02-08 18:13:16.270073910 +0100
5e9bef
@@ -173,6 +173,21 @@ def _excluder_match(excluder, match, reg
5e9bef
 
5e9bef
     return False
5e9bef
 
5e9bef
+def _deduplicate(cur, field):
5e9bef
+    """Eliminate duplicate rows from cursor based on field.
5e9bef
+
5e9bef
+    Assuming the result set can be divided into one or more equivalent groups
5e9bef
+    of rows based on the given field, this wrapper will yield rows from only
5e9bef
+    one of the groups, avoiding duplicates.
5e9bef
+    """
5e9bef
+    first_val = None
5e9bef
+    for ob in cur:
5e9bef
+        if first_val is None:
5e9bef
+            first_val = ob[field]
5e9bef
+        elif ob[field] != first_val:
5e9bef
+            continue
5e9bef
+        yield ob
5e9bef
+
5e9bef
 
5e9bef
 class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
5e9bef
     def __init__(self, repo, db_obj):
5e9bef
@@ -283,6 +298,14 @@ class YumAvailablePackageSqlite(YumAvail
5e9bef
         setattr(self, varname, value)
5e9bef
             
5e9bef
         return value
5e9bef
+
5e9bef
+    # Note that we use pkgId instead of pkgKey to filter the files and
5e9bef
+    # changelog entries since we can't guarantee that pkgKeys in primarydb and
5e9bef
+    # filelistsdb are in sync (since self.pkgKey is obtained from primarydb).
5e9bef
+    #
5e9bef
+    # Also, because of that, we must make sure not to return duplicate entries
5e9bef
+    # in case we have some duplicate packages (i.e. same checksums), so we use
5e9bef
+    # _deduplicate().
5e9bef
         
5e9bef
     def _loadFiles(self):
5e9bef
         if self._loadedfiles:
5e9bef
@@ -293,10 +316,10 @@ class YumAvailablePackageSqlite(YumAvail
5e9bef
         #FIXME - this should be try, excepting
5e9bef
         self.sack.populate(self.repo, mdtype='filelists')
5e9bef
         cur = self._sql_MD('filelists',
5e9bef
-                           "SELECT dirname, filetypes, filenames " \
5e9bef
+                           "SELECT pkgKey, dirname, filetypes, filenames " \
5e9bef
                            "FROM   filelist JOIN packages USING(pkgKey) " \
5e9bef
                            "WHERE  packages.pkgId = ?", (self.pkgId,))
5e9bef
-        for ob in cur:
5e9bef
+        for ob in _deduplicate(cur, 'pkgKey'):
5e9bef
             dirname = ob['dirname']
5e9bef
             if dirname == '.':
5e9bef
                 dirname = ''
5e9bef
@@ -323,13 +346,13 @@ class YumAvailablePackageSqlite(YumAvail
5e9bef
                     self._changelog = result
5e9bef
                     return
5e9bef
             cur = self._sql_MD('other',
5e9bef
-                               "SELECT date, author, changelog " \
5e9bef
+                               "SELECT pkgKey, date, author, changelog " \
5e9bef
                                "FROM   changelog JOIN packages USING(pkgKey) " \
5e9bef
                                "WHERE  pkgId = ? ORDER BY date DESC",
5e9bef
                                (self.pkgId,))
5e9bef
             # Check count(pkgId) here, the same way we do in searchFiles()?
5e9bef
             # Failure mode is much less of a problem.
5e9bef
-            for ob in cur:
5e9bef
+            for ob in _deduplicate(cur, 'pkgKey'):
5e9bef
                 # Note: Atm. rpm only does days, where (60 * 60 * 24) == 86400
5e9bef
                 #       and we have the hack in _dump_changelog() to keep the
5e9bef
                 #       order the same, so this is a quick way to get rid of