Blame SOURCES/0007-reposync-Add-latest-NEVRAs-per-stream-to-download-RhBug-1833074.patch

6c74fc
From 37d60b626fcb3e3f68b02c2c24e4ae5149cf223f Mon Sep 17 00:00:00 2001
6c74fc
From: Marek Blaha <mblaha@redhat.com>
6c74fc
Date: Thu, 23 Jul 2020 16:27:22 +0200
6c74fc
Subject: [PATCH] [reposync] Add latest NEVRAs per stream to download (RhBug:
6c74fc
 1833074)
6c74fc
6c74fc
This covers situation when package with the newest NEVRA is part of
6c74fc
an older version of a stream and reposync was used with --newest-only
6c74fc
switch.
6c74fc
With this patch these package versions are going to be downloaded:
6c74fc
- the latest NEVRAs from non-modular packages
6c74fc
- all packages from stream version with the latest package NEVRA (in
6c74fc
  case the latest NEVRA is part of multiple stream versions only the
6c74fc
  highest is downloaded)
6c74fc
- all packages from the latest stream version
6c74fc
6c74fc
https://bugzilla.redhat.com/show_bug.cgi?id=1833074
6c74fc
---
6c74fc
 plugins/reposync.py | 53 ++++++++++++++++++++++++++++++++++++---------
6c74fc
 1 file changed, 43 insertions(+), 10 deletions(-)
6c74fc
6c74fc
diff --git a/plugins/reposync.py b/plugins/reposync.py
6c74fc
index 548a05b4..7556e7eb 100644
6c74fc
--- a/plugins/reposync.py
6c74fc
+++ b/plugins/reposync.py
6c74fc
@@ -207,27 +207,60 @@ def download_metadata(self, repo):
6c74fc
 
6c74fc
     def _get_latest(self, query):
6c74fc
         """
6c74fc
-        return query with latest nonmodular package and all packages from latest version per stream
6c74fc
+        return union of these queries:
6c74fc
+        - the latest NEVRAs from non-modular packages
6c74fc
+        - all packages from stream version with the latest package NEVRA
6c74fc
+          (this should not be needed but the latest package NEVRAs might be
6c74fc
+          part of an older module version)
6c74fc
+        - all packages from the latest stream version
6c74fc
         """
6c74fc
         if not dnf.base.WITH_MODULES:
6c74fc
             return query.latest()
6c74fc
+
6c74fc
         query.apply()
6c74fc
         module_packages = self.base._moduleContainer.getModulePackages()
6c74fc
         all_artifacts = set()
6c74fc
         module_dict = {}  # {NameStream: {Version: [modules]}}
6c74fc
+        artifact_version = {} # {artifact: {NameStream: [Version]}}
6c74fc
         for module_package in module_packages:
6c74fc
-            all_artifacts.update(module_package.getArtifacts())
6c74fc
+            artifacts = module_package.getArtifacts()
6c74fc
+            all_artifacts.update(artifacts)
6c74fc
             module_dict.setdefault(module_package.getNameStream(), {}).setdefault(
6c74fc
                 module_package.getVersionNum(), []).append(module_package)
6c74fc
-        non_modular_latest = query.filter(
6c74fc
+            for artifact in artifacts:
6c74fc
+                artifact_version.setdefault(artifact, {}).setdefault(
6c74fc
+                    module_package.getNameStream(), []).append(module_package.getVersionNum())
6c74fc
+
6c74fc
+        # the latest NEVRAs from non-modular packages
6c74fc
+        latest_query = query.filter(
6c74fc
             pkg__neq=query.filter(nevra_strict=all_artifacts)).latest()
6c74fc
-        latest_artifacts = set()
6c74fc
-        for version_dict in module_dict.values():
6c74fc
-            keys = sorted(version_dict.keys(), reverse=True)
6c74fc
-            for module in version_dict[keys[0]]:
6c74fc
-                latest_artifacts.update(module.getArtifacts())
6c74fc
-        latest_modular_query = query.filter(nevra_strict=latest_artifacts)
6c74fc
-        return latest_modular_query.union(non_modular_latest)
6c74fc
+
6c74fc
+        # artifacts from the newest version and those versions that contain an artifact
6c74fc
+        # with the highest NEVRA
6c74fc
+        latest_stream_artifacts = set()
6c74fc
+        for namestream, version_dict in module_dict.items():
6c74fc
+            # versions that will be synchronized
6c74fc
+            versions = set()
6c74fc
+            # add the newest stream version
6c74fc
+            versions.add(sorted(version_dict.keys(), reverse=True)[0])
6c74fc
+            # collect all artifacts in all stream versions
6c74fc
+            stream_artifacts = set()
6c74fc
+            for modules in version_dict.values():
6c74fc
+                for module in modules:
6c74fc
+                    stream_artifacts.update(module.getArtifacts())
6c74fc
+            # find versions to which the packages with the highest NEVRAs belong
6c74fc
+            for latest_pkg in query.filter(nevra_strict=stream_artifacts).latest():
6c74fc
+                # here we depend on modules.yaml allways containing full NEVRA (including epoch)
6c74fc
+                nevra = "{0.name}-{0.epoch}:{0.version}-{0.release}.{0.arch}".format(latest_pkg)
6c74fc
+                # download only highest version containing the latest artifact
6c74fc
+                versions.add(max(artifact_version[nevra][namestream]))
6c74fc
+            # add all artifacts from selected versions for synchronization
6c74fc
+            for version in versions:
6c74fc
+                for module in version_dict[version]:
6c74fc
+                    latest_stream_artifacts.update(module.getArtifacts())
6c74fc
+        latest_query = latest_query.union(query.filter(nevra_strict=latest_stream_artifacts))
6c74fc
+
6c74fc
+        return latest_query
6c74fc
 
6c74fc
     def get_pkglist(self, repo):
6c74fc
         query = self.base.sack.query(flags=hawkey.IGNORE_MODULAR_EXCLUDES).available().filterm(