Blob Blame History Raw
diff -up yum-3.4.3/yum/sqlitesack.py.orig yum-3.4.3/yum/sqlitesack.py
--- yum-3.4.3/yum/sqlitesack.py.orig	2017-02-08 18:13:03.646086042 +0100
+++ yum-3.4.3/yum/sqlitesack.py	2017-02-08 18:13:16.270073910 +0100
@@ -173,6 +173,21 @@ def _excluder_match(excluder, match, reg
 
     return False
 
+def _deduplicate(cur, field):
+    """Eliminate duplicate rows from cursor based on field.
+
+    Assuming the result set can be divided into one or more equivalent groups
+    of rows based on the given field, this wrapper will yield rows from only
+    one of the groups, avoiding duplicates.
+    """
+    first_val = None
+    for ob in cur:
+        if first_val is None:
+            first_val = ob[field]
+        elif ob[field] != first_val:
+            continue
+        yield ob
+
 
 class YumAvailablePackageSqlite(YumAvailablePackage, PackageObject, RpmBase):
     def __init__(self, repo, db_obj):
@@ -283,6 +298,14 @@ class YumAvailablePackageSqlite(YumAvail
         setattr(self, varname, value)
             
         return value
+
+    # Note that we use pkgId instead of pkgKey to filter the files and
+    # changelog entries since we can't guarantee that pkgKeys in primarydb and
+    # filelistsdb are in sync (since self.pkgKey is obtained from primarydb).
+    #
+    # Also, because of that, we must make sure not to return duplicate entries
+    # in case we have some duplicate packages (i.e. same checksums), so we use
+    # _deduplicate().
         
     def _loadFiles(self):
         if self._loadedfiles:
@@ -293,10 +316,10 @@ class YumAvailablePackageSqlite(YumAvail
         #FIXME - this should be try, excepting
         self.sack.populate(self.repo, mdtype='filelists')
         cur = self._sql_MD('filelists',
-                           "SELECT dirname, filetypes, filenames " \
+                           "SELECT pkgKey, dirname, filetypes, filenames " \
                            "FROM   filelist JOIN packages USING(pkgKey) " \
                            "WHERE  packages.pkgId = ?", (self.pkgId,))
-        for ob in cur:
+        for ob in _deduplicate(cur, 'pkgKey'):
             dirname = ob['dirname']
             if dirname == '.':
                 dirname = ''
@@ -323,13 +346,13 @@ class YumAvailablePackageSqlite(YumAvail
                     self._changelog = result
                     return
             cur = self._sql_MD('other',
-                               "SELECT date, author, changelog " \
+                               "SELECT pkgKey, date, author, changelog " \
                                "FROM   changelog JOIN packages USING(pkgKey) " \
                                "WHERE  pkgId = ? ORDER BY date DESC",
                                (self.pkgId,))
             # Check count(pkgId) here, the same way we do in searchFiles()?
             # Failure mode is much less of a problem.
-            for ob in cur:
+            for ob in _deduplicate(cur, 'pkgKey'):
                 # Note: Atm. rpm only does days, where (60 * 60 * 24) == 86400
                 #       and we have the hack in _dump_changelog() to keep the
                 #       order the same, so this is a quick way to get rid of