Blame SOURCES/BZ-1391507-fix-filelist-queries-for-dup-pkgs.patch

07a10e
diff -up yum-3.4.3/yum/sqlitesack.py.orig yum-3.4.3/yum/sqlitesack.py
07a10e
--- yum-3.4.3/yum/sqlitesack.py.orig	2017-02-08 18:13:03.646086042 +0100
07a10e
+++ yum-3.4.3/yum/sqlitesack.py	2017-02-08 18:13:16.270073910 +0100
07a10e
@@ -173,6 +173,21 @@ def _excluder_match(excluder, match, reg
07a10e
 
07a10e
     return False
07a10e
 
07a10e
+def _deduplicate(cur, field):
07a10e
+    """Eliminate duplicate rows from cursor based on field.
07a10e
+
07a10e
+    Assuming the result set can be divided into one or more equivalent groups
07a10e
+    of rows based on the given field, this wrapper will yield rows from only
07a10e
+    one of the groups, avoiding duplicates.
07a10e
+    """
07a10e
+    first_val = None
07a10e
+    for ob in cur:
07a10e
+        if first_val is None:
07a10e
+            first_val = ob[field]
07a10e
+        elif ob[field] != first_val:
07a10e
+            continue
07a10e
+        yield ob
07a10e
+
07a10e
 
07a10e
 class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
07a10e
     def __init__(self, repo, db_obj):
07a10e
@@ -283,6 +298,14 @@ class YumAvailablePackageSqlite(YumAvail
07a10e
         setattr(self, varname, value)
07a10e
             
07a10e
         return value
07a10e
+
07a10e
+    # Note that we use pkgId instead of pkgKey to filter the files and
07a10e
+    # changelog entries since we can't guarantee that pkgKeys in primarydb and
07a10e
+    # filelistsdb are in sync (since self.pkgKey is obtained from primarydb).
07a10e
+    #
07a10e
+    # Also, because of that, we must make sure not to return duplicate entries
07a10e
+    # in case we have some duplicate packages (i.e. same checksums), so we use
07a10e
+    # _deduplicate().
07a10e
         
07a10e
     def _loadFiles(self):
07a10e
         if self._loadedfiles:
07a10e
@@ -293,10 +316,10 @@ class YumAvailablePackageSqlite(YumAvail
07a10e
         #FIXME - this should be try, excepting
07a10e
         self.sack.populate(self.repo, mdtype='filelists')
07a10e
         cur = self._sql_MD('filelists',
07a10e
-                           "SELECT dirname, filetypes, filenames " \
07a10e
+                           "SELECT pkgKey, dirname, filetypes, filenames " \
07a10e
                            "FROM   filelist JOIN packages USING(pkgKey) " \
07a10e
                            "WHERE  packages.pkgId = ?", (self.pkgId,))
07a10e
-        for ob in cur:
07a10e
+        for ob in _deduplicate(cur, 'pkgKey'):
07a10e
             dirname = ob['dirname']
07a10e
             if dirname == '.':
07a10e
                 dirname = ''
07a10e
@@ -323,13 +346,13 @@ class YumAvailablePackageSqlite(YumAvail
07a10e
                     self._changelog = result
07a10e
                     return
07a10e
             cur = self._sql_MD('other',
07a10e
-                               "SELECT date, author, changelog " \
07a10e
+                               "SELECT pkgKey, date, author, changelog " \
07a10e
                                "FROM   changelog JOIN packages USING(pkgKey) " \
07a10e
                                "WHERE  pkgId = ? ORDER BY date DESC",
07a10e
                                (self.pkgId,))
07a10e
             # Check count(pkgId) here, the same way we do in searchFiles()?
07a10e
             # Failure mode is much less of a problem.
07a10e
-            for ob in cur:
07a10e
+            for ob in _deduplicate(cur, 'pkgKey'):
07a10e
                 # Note: Atm. rpm only does days, where (60 * 60 * 24) == 86400
07a10e
                 #       and we have the hack in _dump_changelog() to keep the
07a10e
                 #       order the same, so this is a quick way to get rid of