From 014e3890be2e705d62e8f147db3f33694e4c8f73 Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Mar 03 2022 08:52:58 +0000 Subject: [PATCH 1/3] Add support for a different lookaside structure There is a will to offer SIGs the possibility to use a similar lookaside structure as the one used by CentOS Stream and Fedora which is not tied to the name of the archive but to its hash. The idea though is to offer the new structure as opt-in and thus keep the old structure working. We thus need to adjust the get_sources.sh script to support both structure. This commit makes it so, it allows both exploded-srpm and flat dist-git structures to use either the old or new lookaside cache structure. The way this is achieved is simply to first call the URL corresponding to the old lookaside structure. If that call returns a http code in the 200 range, then the script stops, otherwise, the script will call the URL corresponding to the new lookaside structure. This commit also makes consistent the different curl calls and add --retry 5 on all of them (which does not work for 404 replies, so using the new structure will not results in 6 requests to the old one before moving on, but just 1). Signed-off-by: Pierre-Yves Chibon --- diff --git a/get_sources.sh b/get_sources.sh index 890cc35..3ed0466 100755 --- a/get_sources.sh +++ b/get_sources.sh @@ -77,7 +77,7 @@ while [[ 0 -eq 0 ]]; do shift ;; -b ) - # Check this particular branch + # Check this particular branch BRANCH=$2 shift shift @@ -173,9 +173,19 @@ if [[ -s sources ]]; then for br in "${branches[@]}" do br=$(echo ${br}| sed -e s'|remotes/origin/||') + # Try the branch-specific lookaside structure url="${SURL}/$pkgname/${br}/$hash" echo "Retrieving ${url}" - curl -L ${QUIET} -f "${url}" -o "$tarball" && break + HTTP_CODE=$(curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" --write-out "%{http_code}" || true) + echo "Returned ${HTTP_CODE}" + if [[ ${HTTP_CODE} -gt 199 && ${HTTP_CODE} -lt 300 ]] ; then + echo "bailing" + break + fi + # Try the hash-specific lookaside structure + url="${SURL}/$pkgname/$tarball/$hashtype/$hash/$tarball" + echo "Retrieving ${url}" + curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" && break done else echo "$filename exists. skipping" @@ -231,21 +241,22 @@ else # zero byte file touch ${fname} else + hashType=$(weakHashDetection ${fsha}) + if [ "${hashType}" == "unknown" ]; then + echo 'Failure: Hash type unknown.' >&2 + exit 1; + fi + hashName=$(echo ${hashType}| sed -e s'|sum||') + if [ ${CHECK} -eq 1 ]; then - hashType=$(weakHashDetection ${fsha}) - if [ "${hashType}" == "unknown" ]; then - echo 'Failure: Hash type unknown.' >&2 + which ${hashType} >/dev/null 2>&1 + if [[ $? -ne 0 ]]; then + echo "Failure: You need ${hashType} in PATH." >&2 exit 1; - else - which ${hashType} >/dev/null 2>&1 - if [[ $? -ne 0 ]]; then - echo "Failure: You need ${hashType} in PATH." >&2 - exit 1; - fi fi fi if [ -e ${fname} -a ${CHECK} -eq 1 ]; then - # check hash sum and force download if wrong + # check hash sum and force download if wrong downsum=$(${hashType} ${fname} | awk '{print $1}') if [ "${fsha}" != "${downsum}" ]; then rm -f ${fname} @@ -255,9 +266,19 @@ else for br in "${branches[@]}" do br=$(echo ${br}| sed -e s'|remotes/origin/||') + # Try the branch-specific lookaside structure url="${SURL}/${pn}/${br}/${fsha}" echo "Retrieving ${url}" - curl -L ${QUIET} -f "${url}" -o "${fname}" && break + HTTP_CODE=$(curl -L ${QUIET} -H Pragma: -o "${fname}" -R -S --fail --retry 5 "${url}" --write-out "%{http_code}" || true) + echo "Returned ${HTTP_CODE}" + if [[ ${HTTP_CODE} -gt 199 && ${HTTP_CODE} -lt 300 ]] ; then + echo "bailing" + break + fi + # Try the hash-specific lookaside structure + url="${SURL}/$pn/$fname/${hashName}/$fsha/$fname" + echo "Retrieving ${url}" + curl -L ${QUIET} -H Pragma: -o "${fname}" -R -S --fail --retry 5 "${url}" && break done else echo "${fname} exists. skipping" From 04de7706694388e2d43b0d628c3e6d728978082a Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Mar 03 2022 08:53:43 +0000 Subject: [PATCH 2/3] Adjust get_sources.sh to allow empty `sources` file There are situations in which one will want to use a `sources` file to indicate that they wish to use the flat dist-git layout. However, until now we did not allow empty `sources` file in flat dist-git layout. With this change we allow empty `sources` file and we will just echo something in the logs saying that this file is empty and bail. Signed-off-by: Pierre-Yves Chibon --- diff --git a/get_sources.sh b/get_sources.sh old mode 100755 new mode 100644 index 3ed0466..ea1b3bc --- a/get_sources.sh +++ b/get_sources.sh @@ -146,52 +146,56 @@ else done <<< "$(git branch -r --contains HEAD | sed 's#origin/##g')" fi -if [[ -s sources ]]; then - # This section is for the "flat" dist-git layout, where the spec file and - # patches are all present at the top level directory and the sha of the tarball - # present in a 'sources' file. - # This code was re-used from the fedpkg-pkg minimal project which is licensed - # under GPLv3 or any later version. +if [[ -f sources ]]; then + if [[ ! -s sources ]]; then + echo "Empty sources file -- nothing to check" + else + # This section is for the "flat" dist-git layout, where the spec file and + # patches are all present at the top level directory and the sha of the tarball + # present in a 'sources' file. + # This code was re-used from the fedpkg-pkg minimal project which is licensed + # under GPLv3 or any later version. - pkgname=$(basename "$PWD") - # Read first word of first line. For old MD5 format it's the 32 character - # hash. Otherwise let's assume the sources have the BSD format where lines - # start with hash type. - hashtype="$(head -n1 sources | cut -d' ' -f1 | tr '[:upper:]' '[:lower:]')" - # The format is - # SHA512 (filename) = ABCDEF - # We don't care about the equals sign. We also assume all hashes are - # the same type, so we don't need to read it again for each line. - while read -r _ filename _ hash || [[ -n "$filename" && -n "$hash" ]]; do - if [ -z "$filename" ] || [ -z "$hash" ]; then - continue - fi - # Remove parenthesis around tarball name - filename=${filename#(} - tarball=${filename%)} - if [ ! -e "$tarball" ]; then - for br in "${branches[@]}" - do - br=$(echo ${br}| sed -e s'|remotes/origin/||') - # Try the branch-specific lookaside structure - url="${SURL}/$pkgname/${br}/$hash" - echo "Retrieving ${url}" - HTTP_CODE=$(curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" --write-out "%{http_code}" || true) - echo "Returned ${HTTP_CODE}" - if [[ ${HTTP_CODE} -gt 199 && ${HTTP_CODE} -lt 300 ]] ; then - echo "bailing" - break - fi - # Try the hash-specific lookaside structure - url="${SURL}/$pkgname/$tarball/$hashtype/$hash/$tarball" - echo "Retrieving ${url}" - curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" && break - done - else - echo "$filename exists. skipping" - fi - done < sources - "${hashtype}sum" -c sources + pkgname=$(basename "$PWD") + # Read first word of first line. For old MD5 format it's the 32 character + # hash. Otherwise let's assume the sources have the BSD format where lines + # start with hash type. + hashtype="$(head -n1 sources | cut -d' ' -f1 | tr '[:upper:]' '[:lower:]')" + # The format is + # SHA512 (filename) = ABCDEF + # We don't care about the equals sign. We also assume all hashes are + # the same type, so we don't need to read it again for each line. + while read -r _ filename _ hash || [[ -n "$filename" && -n "$hash" ]]; do + if [ -z "$filename" ] || [ -z "$hash" ]; then + continue + fi + # Remove parenthesis around tarball name + filename=${filename#(} + tarball=${filename%)} + if [ ! -e "$tarball" ]; then + for br in "${branches[@]}" + do + br=$(echo ${br}| sed -e s'|remotes/origin/||') + # Try the branch-specific lookaside structure + url="${SURL}/$pkgname/${br}/$hash" + echo "Retrieving ${url}" + HTTP_CODE=$(curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" --write-out "%{http_code}" || true) + echo "Returned ${HTTP_CODE}" + if [[ ${HTTP_CODE} -gt 199 && ${HTTP_CODE} -lt 300 ]] ; then + echo "bailing" + break + fi + # Try the hash-specific lookaside structure + url="${SURL}/$pkgname/$tarball/$hashtype/$hash/$tarball" + echo "Retrieving ${url}" + curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" && break + done + else + echo "$filename exists. skipping" + fi + done < sources + "${hashtype}sum" -c sources + fi else # This section is for the "non-flat" dist-git layout, where the spec file # is stored in a SPECS folder, the patches in a SOURCES folder and the sha From 5480ad2988d1325a0f220012e4cad724374c42ce Mon Sep 17 00:00:00 2001 From: Pierre-Yves Chibon Date: Mar 03 2022 08:56:28 +0000 Subject: [PATCH 3/3] Add a little debugging info to the `get_sources` script This will tell which part of the script is being considered when it is retrieving the sources from the lookaside cache. Signed-off-by: Pierre-Yves Chibon --- diff --git a/get_sources.sh b/get_sources.sh index ea1b3bc..daa1bfc 100644 --- a/get_sources.sh +++ b/get_sources.sh @@ -147,6 +147,7 @@ else fi if [[ -f sources ]]; then + echo "Flat layout style" if [[ ! -s sources ]]; then echo "Empty sources file -- nothing to check" else @@ -197,6 +198,7 @@ if [[ -f sources ]]; then "${hashtype}sum" -c sources fi else + echo "Exploded SRPM layout style" # This section is for the "non-flat" dist-git layout, where the spec file # is stored in a SPECS folder, the patches in a SOURCES folder and the sha # of the tarball of the project is present in a '..metadata' file.