Blob Blame History Raw
From 37d60b626fcb3e3f68b02c2c24e4ae5149cf223f Mon Sep 17 00:00:00 2001
From: Marek Blaha <mblaha@redhat.com>
Date: Thu, 23 Jul 2020 16:27:22 +0200
Subject: [PATCH] [reposync] Add latest NEVRAs per stream to download (RhBug:
 1833074)

This covers situation when package with the newest NEVRA is part of
an older version of a stream and reposync was used with --newest-only
switch.
With this patch these package versions are going to be downloaded:
- the latest NEVRAs from non-modular packages
- all packages from stream version with the latest package NEVRA (in
  case the latest NEVRA is part of multiple stream versions only the
  highest is downloaded)
- all packages from the latest stream version

https://bugzilla.redhat.com/show_bug.cgi?id=1833074
---
 plugins/reposync.py | 53 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/plugins/reposync.py b/plugins/reposync.py
index 548a05b4..7556e7eb 100644
--- a/plugins/reposync.py
+++ b/plugins/reposync.py
@@ -207,27 +207,60 @@ def download_metadata(self, repo):
 
     def _get_latest(self, query):
         """
-        return query with latest nonmodular package and all packages from latest version per stream
+        return union of these queries:
+        - the latest NEVRAs from non-modular packages
+        - all packages from stream version with the latest package NEVRA
+          (this should not be needed but the latest package NEVRAs might be
+          part of an older module version)
+        - all packages from the latest stream version
         """
         if not dnf.base.WITH_MODULES:
             return query.latest()
+
         query.apply()
         module_packages = self.base._moduleContainer.getModulePackages()
         all_artifacts = set()
         module_dict = {}  # {NameStream: {Version: [modules]}}
+        artifact_version = {} # {artifact: {NameStream: [Version]}}
         for module_package in module_packages:
-            all_artifacts.update(module_package.getArtifacts())
+            artifacts = module_package.getArtifacts()
+            all_artifacts.update(artifacts)
             module_dict.setdefault(module_package.getNameStream(), {}).setdefault(
                 module_package.getVersionNum(), []).append(module_package)
-        non_modular_latest = query.filter(
+            for artifact in artifacts:
+                artifact_version.setdefault(artifact, {}).setdefault(
+                    module_package.getNameStream(), []).append(module_package.getVersionNum())
+
+        # the latest NEVRAs from non-modular packages
+        latest_query = query.filter(
             pkg__neq=query.filter(nevra_strict=all_artifacts)).latest()
-        latest_artifacts = set()
-        for version_dict in module_dict.values():
-            keys = sorted(version_dict.keys(), reverse=True)
-            for module in version_dict[keys[0]]:
-                latest_artifacts.update(module.getArtifacts())
-        latest_modular_query = query.filter(nevra_strict=latest_artifacts)
-        return latest_modular_query.union(non_modular_latest)
+
+        # artifacts from the newest version and those versions that contain an artifact
+        # with the highest NEVRA
+        latest_stream_artifacts = set()
+        for namestream, version_dict in module_dict.items():
+            # versions that will be synchronized
+            versions = set()
+            # add the newest stream version
+            versions.add(sorted(version_dict.keys(), reverse=True)[0])
+            # collect all artifacts in all stream versions
+            stream_artifacts = set()
+            for modules in version_dict.values():
+                for module in modules:
+                    stream_artifacts.update(module.getArtifacts())
+            # find versions to which the packages with the highest NEVRAs belong
+            for latest_pkg in query.filter(nevra_strict=stream_artifacts).latest():
+                # here we depend on modules.yaml allways containing full NEVRA (including epoch)
+                nevra = "{0.name}-{0.epoch}:{0.version}-{0.release}.{0.arch}".format(latest_pkg)
+                # download only highest version containing the latest artifact
+                versions.add(max(artifact_version[nevra][namestream]))
+            # add all artifacts from selected versions for synchronization
+            for version in versions:
+                for module in version_dict[version]:
+                    latest_stream_artifacts.update(module.getArtifacts())
+        latest_query = latest_query.union(query.filter(nevra_strict=latest_stream_artifacts))
+
+        return latest_query
 
     def get_pkglist(self, repo):
         query = self.base.sack.query(flags=hawkey.IGNORE_MODULAR_EXCLUDES).available().filterm(