From 054cd9db81af13b5cdc669067775ff71aa82625f Mon Sep 17 00:00:00 2001 From: Matthew Almond Date: Tue, 9 Mar 2021 11:01:40 -0800 Subject: [PATCH 1/2] Use libdnf.utils.checksum_{check,value} libdnf has the canonical implementation of checksum handling. We aim to replace all use of dnf.yum.misc.checksum() with this. In doing so, this fixes installing previously downloaded and transcoded rpms to support https://github.com/rpm-software-management/librepo/pull/222 This also has some minor performance benefits: librepo's checksum handling employs caching of previously downloaded files via extended attributes. This works for ordinary rpms in the dnf cache, but does not work (yet) for rpm paths specified on the command line due to https://github.com/rpm-software-management/librepo/issues/233. That issue is pretty minor, and the fix ends up in libdnf later. The previous implementation maps all runtime errors to MiscError. We do this still by taking the libdnf.error.Error class (defined in SWIG) and map it directly back to the Python exception as before. --- dnf/package.py | 18 ++++---- dnf/yum/misc.py | 113 ------------------------------------------------ 2 files changed, 10 insertions(+), 121 deletions(-) diff --git a/dnf/package.py b/dnf/package.py index b01e555eba..fc89cf98a8 100644 --- a/dnf/package.py +++ b/dnf/package.py @@ -30,6 +30,8 @@ import dnf.rpm import dnf.yum.misc import hawkey +import libdnf.error +import libdnf.utils import logging import os import rpm @@ -56,7 +58,10 @@ def _chksum(self): return self._priv_chksum if self._from_cmdline: chksum_type = dnf.yum.misc.get_default_chksum_type() - chksum_val = dnf.yum.misc.checksum(chksum_type, self.location) + try: + chksum_val = libdnf.utils.checksum_value(chksum_type, self.location) + except libdnf.error.Error as e: + raise dnf.exceptions.MiscError(str(e)) return (hawkey.chksum_type(chksum_type), binascii.unhexlify(chksum_val)) return super(Package, self).chksum @@ -330,10 +335,7 @@ def verifyLocalPkg(self): if self._from_cmdline: return True # local package always verifies against itself (chksum_type, chksum) = self.returnIdSum() - real_sum = dnf.yum.misc.checksum(chksum_type, self.localPkg(), - datasize=self._size) - if real_sum != chksum: - logger.debug(_('%s: %s check failed: %s vs %s'), - self, chksum_type, real_sum, chksum) - return False - return True + try: + return libdnf.utils.checksum_check(chksum_type, self.localPkg(), chksum) + except libdnf.error.Error as e: + raise dnf.exceptions.MiscError(str(e)) diff --git a/dnf/yum/misc.py b/dnf/yum/misc.py index 3e3905feb8..af018a8a1b 100644 --- a/dnf/yum/misc.py +++ b/dnf/yum/misc.py @@ -22,7 +22,6 @@ from __future__ import print_function, absolute_import from __future__ import unicode_literals -from dnf.exceptions import MiscError from dnf.pycomp import base64_decodebytes, basestring, unicode from stat import * import libdnf.utils @@ -32,7 +31,6 @@ import dnf.i18n import errno import glob -import hashlib import io import os import os.path @@ -41,7 +39,6 @@ import shutil import tempfile -_available_checksums = set(['md5', 'sha1', 'sha256', 'sha384', 'sha512']) _default_checksums = ['sha256'] @@ -68,119 +65,9 @@ def re_full_search_needed(s): return True return False - -class Checksums(object): - """ Generate checksum(s), on given pieces of data. Producing the - Length and the result(s) when complete. """ - - def __init__(self, checksums=None, ignore_missing=False, ignore_none=False): - if checksums is None: - checksums = _default_checksums - self._sumalgos = [] - self._sumtypes = [] - self._len = 0 - - done = set() - for sumtype in checksums: - if sumtype == 'sha': - sumtype = 'sha1' - if sumtype in done: - continue - - if sumtype in _available_checksums: - sumalgo = hashlib.new(sumtype) - elif ignore_missing: - continue - else: - raise MiscError('Error Checksumming, bad checksum type %s' % - sumtype) - done.add(sumtype) - self._sumtypes.append(sumtype) - self._sumalgos.append(sumalgo) - if not done and not ignore_none: - raise MiscError('Error Checksumming, no valid checksum type') - - def __len__(self): - return self._len - - # Note that len(x) is assert limited to INT_MAX, which is 2GB on i686. - length = property(fget=lambda self: self._len) - - def update(self, data): - self._len += len(data) - for sumalgo in self._sumalgos: - data = data.encode('utf-8') if isinstance(data, unicode) else data - sumalgo.update(data) - - def read(self, fo, size=2**16): - data = fo.read(size) - self.update(data) - return data - - def hexdigests(self): - ret = {} - for sumtype, sumdata in zip(self._sumtypes, self._sumalgos): - ret[sumtype] = sumdata.hexdigest() - return ret - - def hexdigest(self, checksum=None): - if checksum is None: - if not self._sumtypes: - return None - checksum = self._sumtypes[0] - if checksum == 'sha': - checksum = 'sha1' - return self.hexdigests()[checksum] - - def digests(self): - ret = {} - for sumtype, sumdata in zip(self._sumtypes, self._sumalgos): - ret[sumtype] = sumdata.digest() - return ret - - def digest(self, checksum=None): - if checksum is None: - if not self._sumtypes: - return None - checksum = self._sumtypes[0] - if checksum == 'sha': - checksum = 'sha1' - return self.digests()[checksum] - def get_default_chksum_type(): return _default_checksums[0] -def checksum(sumtype, file, CHUNK=2**16, datasize=None): - """takes filename, hand back Checksum of it - sumtype = md5 or sha/sha1/sha256/sha512 (note sha == sha1) - filename = /path/to/file - CHUNK=65536 by default""" - - # chunking brazenly lifted from Ryan Tomayko - - if isinstance(file, basestring): - try: - with open(file, 'rb', CHUNK) as fo: - return checksum(sumtype, fo, CHUNK, datasize) - except (IOError, OSError): - raise MiscError('Error opening file for checksum: %s' % file) - - try: - # assumes file is a file-like-object - data = Checksums([sumtype]) - while data.read(file, CHUNK): - if datasize is not None and data.length > datasize: - break - - # This screws up the length, but that shouldn't matter. We only care - # if this checksum == what we expect. - if datasize is not None and datasize != data.length: - return '!%u!%s' % (datasize, data.hexdigest(sumtype)) - - return data.hexdigest(sumtype) - except (IOError, OSError) as e: - raise MiscError('Error reading file for checksum: %s' % file) - class GenericHolder(object): """Generic Holder class used to hold other objects of known types It exists purely to be able to do object.somestuff, object.someotherstuff