Blob Blame History Raw
From 054cd9db81af13b5cdc669067775ff71aa82625f Mon Sep 17 00:00:00 2001
From: Matthew Almond <malmond@fb.com>
Date: Tue, 9 Mar 2021 11:01:40 -0800
Subject: [PATCH 1/2] Use libdnf.utils.checksum_{check,value}

libdnf has the canonical implementation of checksum handling. We aim to
replace all use of dnf.yum.misc.checksum() with this. In doing so, this
fixes installing previously downloaded and transcoded rpms to support

https://github.com/rpm-software-management/librepo/pull/222

This also has some minor performance benefits: librepo's checksum
handling employs caching of previously downloaded files via extended
attributes. This works for ordinary rpms in the dnf cache, but does not
work (yet) for rpm paths specified on the command line due to
https://github.com/rpm-software-management/librepo/issues/233. That
issue is pretty minor, and the fix ends up in libdnf later.

The previous implementation maps all runtime errors to MiscError. We do
this still by taking the libdnf.error.Error class (defined in SWIG) and
map it directly back to the Python exception as before.
---
 dnf/package.py  |  18 ++++----
 dnf/yum/misc.py | 113 ------------------------------------------------
 2 files changed, 10 insertions(+), 121 deletions(-)

diff --git a/dnf/package.py b/dnf/package.py
index b01e555eba..fc89cf98a8 100644
--- a/dnf/package.py
+++ b/dnf/package.py
@@ -30,6 +30,8 @@
 import dnf.rpm
 import dnf.yum.misc
 import hawkey
+import libdnf.error
+import libdnf.utils
 import logging
 import os
 import rpm
@@ -56,7 +58,10 @@ def _chksum(self):
             return self._priv_chksum
         if self._from_cmdline:
             chksum_type = dnf.yum.misc.get_default_chksum_type()
-            chksum_val = dnf.yum.misc.checksum(chksum_type, self.location)
+            try:
+                chksum_val = libdnf.utils.checksum_value(chksum_type, self.location)
+            except libdnf.error.Error as e:
+                raise dnf.exceptions.MiscError(str(e))
             return (hawkey.chksum_type(chksum_type),
                     binascii.unhexlify(chksum_val))
         return super(Package, self).chksum
@@ -330,10 +335,7 @@ def verifyLocalPkg(self):
         if self._from_cmdline:
             return True # local package always verifies against itself
         (chksum_type, chksum) = self.returnIdSum()
-        real_sum = dnf.yum.misc.checksum(chksum_type, self.localPkg(),
-                                         datasize=self._size)
-        if real_sum != chksum:
-            logger.debug(_('%s: %s check failed: %s vs %s'),
-                         self, chksum_type, real_sum, chksum)
-            return False
-        return True
+        try:
+            return libdnf.utils.checksum_check(chksum_type, self.localPkg(), chksum)
+        except libdnf.error.Error as e:
+            raise dnf.exceptions.MiscError(str(e))
diff --git a/dnf/yum/misc.py b/dnf/yum/misc.py
index 3e3905feb8..af018a8a1b 100644
--- a/dnf/yum/misc.py
+++ b/dnf/yum/misc.py
@@ -22,7 +22,6 @@
 
 from __future__ import print_function, absolute_import
 from __future__ import unicode_literals
-from dnf.exceptions import MiscError
 from dnf.pycomp import base64_decodebytes, basestring, unicode
 from stat import *
 import libdnf.utils
@@ -32,7 +31,6 @@
 import dnf.i18n
 import errno
 import glob
-import hashlib
 import io
 import os
 import os.path
@@ -41,7 +39,6 @@
 import shutil
 import tempfile
 
-_available_checksums = set(['md5', 'sha1', 'sha256', 'sha384', 'sha512'])
 _default_checksums = ['sha256']
 
 
@@ -68,119 +65,9 @@ def re_full_search_needed(s):
             return True
     return False
 
-
-class Checksums(object):
-    """ Generate checksum(s), on given pieces of data. Producing the
-        Length and the result(s) when complete. """
-
-    def __init__(self, checksums=None, ignore_missing=False, ignore_none=False):
-        if checksums is None:
-            checksums = _default_checksums
-        self._sumalgos = []
-        self._sumtypes = []
-        self._len = 0
-
-        done = set()
-        for sumtype in checksums:
-            if sumtype == 'sha':
-                sumtype = 'sha1'
-            if sumtype in done:
-                continue
-
-            if sumtype in _available_checksums:
-                sumalgo = hashlib.new(sumtype)
-            elif ignore_missing:
-                continue
-            else:
-                raise MiscError('Error Checksumming, bad checksum type %s' %
-                                sumtype)
-            done.add(sumtype)
-            self._sumtypes.append(sumtype)
-            self._sumalgos.append(sumalgo)
-        if not done and not ignore_none:
-            raise MiscError('Error Checksumming, no valid checksum type')
-
-    def __len__(self):
-        return self._len
-
-    # Note that len(x) is assert limited to INT_MAX, which is 2GB on i686.
-    length = property(fget=lambda self: self._len)
-
-    def update(self, data):
-        self._len += len(data)
-        for sumalgo in self._sumalgos:
-            data = data.encode('utf-8') if isinstance(data, unicode) else data
-            sumalgo.update(data)
-
-    def read(self, fo, size=2**16):
-        data = fo.read(size)
-        self.update(data)
-        return data
-
-    def hexdigests(self):
-        ret = {}
-        for sumtype, sumdata in zip(self._sumtypes, self._sumalgos):
-            ret[sumtype] = sumdata.hexdigest()
-        return ret
-
-    def hexdigest(self, checksum=None):
-        if checksum is None:
-            if not self._sumtypes:
-                return None
-            checksum = self._sumtypes[0]
-        if checksum == 'sha':
-            checksum = 'sha1'
-        return self.hexdigests()[checksum]
-
-    def digests(self):
-        ret = {}
-        for sumtype, sumdata in zip(self._sumtypes, self._sumalgos):
-            ret[sumtype] = sumdata.digest()
-        return ret
-
-    def digest(self, checksum=None):
-        if checksum is None:
-            if not self._sumtypes:
-                return None
-            checksum = self._sumtypes[0]
-        if checksum == 'sha':
-            checksum = 'sha1'
-        return self.digests()[checksum]
-
 def get_default_chksum_type():
     return _default_checksums[0]
 
-def checksum(sumtype, file, CHUNK=2**16, datasize=None):
-    """takes filename, hand back Checksum of it
-       sumtype = md5 or sha/sha1/sha256/sha512 (note sha == sha1)
-       filename = /path/to/file
-       CHUNK=65536 by default"""
-
-    # chunking brazenly lifted from Ryan Tomayko
-
-    if isinstance(file, basestring):
-        try:
-            with open(file, 'rb', CHUNK) as fo:
-                return checksum(sumtype, fo, CHUNK, datasize)
-        except (IOError, OSError):
-            raise MiscError('Error opening file for checksum: %s' % file)
-
-    try:
-        # assumes file is a file-like-object
-        data = Checksums([sumtype])
-        while data.read(file, CHUNK):
-            if datasize is not None and data.length > datasize:
-                break
-
-        # This screws up the length, but that shouldn't matter. We only care
-        # if this checksum == what we expect.
-        if datasize is not None and datasize != data.length:
-            return '!%u!%s' % (datasize, data.hexdigest(sumtype))
-
-        return data.hexdigest(sumtype)
-    except (IOError, OSError) as e:
-        raise MiscError('Error reading file for checksum: %s' % file)
-
 class GenericHolder(object):
     """Generic Holder class used to hold other objects of known types
        It exists purely to be able to do object.somestuff, object.someotherstuff