Blame SOURCES/BZ-1391507-fix-filelist-queries-for-dup-pkgs.patch

d2a170
diff -up yum-3.4.3/yum/sqlitesack.py.orig yum-3.4.3/yum/sqlitesack.py
d2a170
--- yum-3.4.3/yum/sqlitesack.py.orig	2017-02-08 18:13:03.646086042 +0100
d2a170
+++ yum-3.4.3/yum/sqlitesack.py	2017-02-08 18:13:16.270073910 +0100
d2a170
@@ -173,6 +173,21 @@ def _excluder_match(excluder, match, reg
d2a170
 
d2a170
     return False
d2a170
 
d2a170
+def _deduplicate(cur, field):
d2a170
+    """Eliminate duplicate rows from cursor based on field.
d2a170
+
d2a170
+    Assuming the result set can be divided into one or more equivalent groups
d2a170
+    of rows based on the given field, this wrapper will yield rows from only
d2a170
+    one of the groups, avoiding duplicates.
d2a170
+    """
d2a170
+    first_val = None
d2a170
+    for ob in cur:
d2a170
+        if first_val is None:
d2a170
+            first_val = ob[field]
d2a170
+        elif ob[field] != first_val:
d2a170
+            continue
d2a170
+        yield ob
d2a170
+
d2a170
 
d2a170
 class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
d2a170
     def __init__(self, repo, db_obj):
d2a170
@@ -283,6 +298,14 @@ class YumAvailablePackageSqlite(YumAvail
d2a170
         setattr(self, varname, value)
d2a170
             
d2a170
         return value
d2a170
+
d2a170
+    # Note that we use pkgId instead of pkgKey to filter the files and
d2a170
+    # changelog entries since we can't guarantee that pkgKeys in primarydb and
d2a170
+    # filelistsdb are in sync (since self.pkgKey is obtained from primarydb).
d2a170
+    #
d2a170
+    # Also, because of that, we must make sure not to return duplicate entries
d2a170
+    # in case we have some duplicate packages (i.e. same checksums), so we use
d2a170
+    # _deduplicate().
d2a170
         
d2a170
     def _loadFiles(self):
d2a170
         if self._loadedfiles:
d2a170
@@ -293,10 +316,10 @@ class YumAvailablePackageSqlite(YumAvail
d2a170
         #FIXME - this should be try, excepting
d2a170
         self.sack.populate(self.repo, mdtype='filelists')
d2a170
         cur = self._sql_MD('filelists',
d2a170
-                           "SELECT dirname, filetypes, filenames " \
d2a170
+                           "SELECT pkgKey, dirname, filetypes, filenames " \
d2a170
                            "FROM   filelist JOIN packages USING(pkgKey) " \
d2a170
                            "WHERE  packages.pkgId = ?", (self.pkgId,))
d2a170
-        for ob in cur:
d2a170
+        for ob in _deduplicate(cur, 'pkgKey'):
d2a170
             dirname = ob['dirname']
d2a170
             if dirname == '.':
d2a170
                 dirname = ''
d2a170
@@ -323,13 +346,13 @@ class YumAvailablePackageSqlite(YumAvail
d2a170
                     self._changelog = result
d2a170
                     return
d2a170
             cur = self._sql_MD('other',
d2a170
-                               "SELECT date, author, changelog " \
d2a170
+                               "SELECT pkgKey, date, author, changelog " \
d2a170
                                "FROM   changelog JOIN packages USING(pkgKey) " \
d2a170
                                "WHERE  pkgId = ? ORDER BY date DESC",
d2a170
                                (self.pkgId,))
d2a170
             # Check count(pkgId) here, the same way we do in searchFiles()?
d2a170
             # Failure mode is much less of a problem.
d2a170
-            for ob in cur:
d2a170
+            for ob in _deduplicate(cur, 'pkgKey'):
d2a170
                 # Note: Atm. rpm only does days, where (60 * 60 * 24) == 86400
d2a170
                 #       and we have the hack in _dump_changelog() to keep the
d2a170
                 #       order the same, so this is a quick way to get rid of