#10 Add support for a differnt lookaside structure
Merged 2 years ago by bstinson. Opened 2 years ago by pingou.
pingou/centos-git-common new_get_sources  into  master

file modified
+74 -47
@@ -77,7 +77,7 @@ 

              shift

             ;;

           -b )

-             # Check this particular branch 

+             # Check this particular branch

              BRANCH=$2

              shift

              shift
@@ -146,43 +146,59 @@ 

    done <<< "$(git branch -r --contains HEAD | sed 's#origin/##g')"

  fi

  

- if [[ -s sources ]]; then

-     # This section is for the "flat" dist-git layout, where the spec file and

-     # patches are all present at the top level directory and the sha of the tarball

-     # present in a 'sources' file.

-     # This code was re-used from the fedpkg-pkg minimal project which is licensed

-     # under GPLv3 or any later version.

+ if [[ -f sources ]]; then

+     echo "Flat layout style"

+     if [[ ! -s sources ]]; then

+       echo "Empty sources file -- nothing to check"

+     else

+       # This section is for the "flat" dist-git layout, where the spec file and

+       # patches are all present at the top level directory and the sha of the tarball

+       # present in a 'sources' file.

+       # This code was re-used from the fedpkg-pkg minimal project which is licensed

+       # under GPLv3 or any later version.

  

-     pkgname=$(basename "$PWD")

-     # Read first word of first line. For old MD5 format it's the 32 character

-     # hash. Otherwise let's assume the sources have the BSD format where lines

-     # start with hash type.

-     hashtype="$(head -n1 sources | cut -d' ' -f1 | tr '[:upper:]' '[:lower:]')"

-     # The format is

-     #   SHA512 (filename) = ABCDEF

-     # We don't care about the equals sign. We also assume all hashes are

-     # the same type, so we don't need to read it again for each line.

-     while read -r _ filename _ hash || [[ -n "$filename" && -n "$hash" ]]; do

-         if [ -z "$filename" ] || [ -z "$hash" ]; then

-             continue

-         fi

-         # Remove parenthesis around tarball name

-         filename=${filename#(}

-         tarball=${filename%)}

-         if [ ! -e "$tarball" ]; then

-           for br in "${branches[@]}"

-           do

-             br=$(echo ${br}| sed -e s'|remotes/origin/||')

-             url="${SURL}/$pkgname/${br}/$hash"

-             echo "Retrieving ${url}"

-             curl -L ${QUIET} -f "${url}" -o "$tarball" && break

-           done

-         else

-           echo "$filename exists. skipping"

-         fi

-     done < sources

-     "${hashtype}sum" -c sources

+       pkgname=$(basename "$PWD")

+       # Read first word of first line. For old MD5 format it's the 32 character

+       # hash. Otherwise let's assume the sources have the BSD format where lines

+       # start with hash type.

+       hashtype="$(head -n1 sources | cut -d' ' -f1 | tr '[:upper:]' '[:lower:]')"

+       # The format is

+       #   SHA512 (filename) = ABCDEF

+       # We don't care about the equals sign. We also assume all hashes are

+       # the same type, so we don't need to read it again for each line.

+       while read -r _ filename _ hash || [[ -n "$filename" && -n "$hash" ]]; do

+           if [ -z "$filename" ] || [ -z "$hash" ]; then

+               continue

+           fi

+           # Remove parenthesis around tarball name

+           filename=${filename#(}

+           tarball=${filename%)}

+           if [ ! -e "$tarball" ]; then

+             for br in "${branches[@]}"

+             do

+               br=$(echo ${br}| sed -e s'|remotes/origin/||')

+               # Try the branch-specific lookaside structure

+               url="${SURL}/$pkgname/${br}/$hash"

+               echo "Retrieving ${url}"

+               HTTP_CODE=$(curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" --write-out "%{http_code}" || true)

+               echo "Returned ${HTTP_CODE}"

+               if [[ ${HTTP_CODE} -gt 199 && ${HTTP_CODE} -lt 300 ]] ; then

+                  echo "bailing"

+                  break

+               fi

+               # Try the hash-specific lookaside structure

+               url="${SURL}/$pkgname/$tarball/$hashtype/$hash/$tarball"

+               echo "Retrieving ${url}"

+               curl -L ${QUIET} -H Pragma: -o "./$tarball" -R -S --fail --retry 5 "${url}" && break

+             done

+           else

+             echo "$filename exists. skipping"

+           fi

+       done < sources

+       "${hashtype}sum" -c sources

+     fi

  else

+     echo "Exploded SRPM layout style"

      # This section is for the "non-flat" dist-git layout, where the spec file

      # is stored in a SPECS folder, the patches in a SOURCES folder and the sha

      # of the tarball of the project is present in a '.<pkg_name>.metadata' file.
@@ -231,21 +247,22 @@ 

          # zero byte file

          touch ${fname}

        else

+         hashType=$(weakHashDetection ${fsha})

+         if [ "${hashType}" == "unknown" ]; then

+           echo 'Failure: Hash type unknown.' >&2

+           exit 1;

+         fi

+         hashName=$(echo ${hashType}| sed -e s'|sum||')

+ 

          if [ ${CHECK} -eq 1 ]; then

-           hashType=$(weakHashDetection ${fsha})

-           if [ "${hashType}" == "unknown" ]; then

-             echo 'Failure: Hash type unknown.' >&2

+           which ${hashType} >/dev/null 2>&1

Do we need to be asking which ${hashType}sum here instead?

+           if [[ $? -ne 0 ]]; then

+             echo "Failure: You need ${hashType} in PATH." >&2

              exit 1;

-           else

-             which ${hashType} >/dev/null 2>&1

-             if [[ $? -ne 0 ]]; then

-               echo "Failure: You need ${hashType} in PATH." >&2

-               exit 1;

-             fi

            fi

          fi

          if [ -e ${fname} -a ${CHECK} -eq 1 ]; then

- 	    # check hash sum and force download if wrong

+             # check hash sum and force download if wrong

              downsum=$(${hashType} ${fname} | awk '{print $1}')

              if [ "${fsha}" != "${downsum}" ]; then

                  rm -f ${fname}
@@ -255,9 +272,19 @@ 

            for br in "${branches[@]}"

            do

              br=$(echo ${br}| sed -e s'|remotes/origin/||')

+             # Try the branch-specific lookaside structure

              url="${SURL}/${pn}/${br}/${fsha}"

              echo "Retrieving ${url}"

-             curl -L ${QUIET} -f "${url}" -o "${fname}" && break

+             HTTP_CODE=$(curl -L ${QUIET} -H Pragma: -o "${fname}" -R -S --fail --retry 5 "${url}" --write-out "%{http_code}" || true)

+             echo "Returned ${HTTP_CODE}"

+             if [[ ${HTTP_CODE} -gt 199 && ${HTTP_CODE} -lt 300 ]] ; then

+                echo "bailing"

+                break

+             fi

+             # Try the hash-specific lookaside structure

+             url="${SURL}/$pn/$fname/${hashName}/$fsha/$fname"

+             echo "Retrieving ${url}"

+             curl -L ${QUIET} -H Pragma: -o "${fname}" -R -S --fail --retry 5 "${url}" && break

            done

          else

            echo "${fname} exists. skipping"

There is a will to offer SIGs the possibility to use a similar lookaside
structure as the one used by CentOS Stream and Fedora which is not tied
to the name of the archive but to its hash.
The idea though is to offer the new structure as opt-in and thus keep
the old structure working.
We thus need to adjust the get_sources.sh script to support both
structure.
This commit makes it so, it allows both exploded-srpm and flat dist-git
structures to use either the old or new lookaside cache structure. The
way this is achieved is simply to first call the URL corresponding to
the old lookaside structure. If that call returns a http code in the 200
range, then the script stops, otherwise, the script will call the URL
corresponding to the new lookaside structure.

This commit also makes consistent the different curl calls and add
--retry 5 on all of them (which does not work for 404 replies, so
using the new structure will not results in 6 requests to the old one
before moving on, but just 1).

Signed-off-by: Pierre-Yves Chibon pingou@pingoured.fr

This PR can be reviewed, merged and deployed but it will only be useful once the work on the new lookaside upload script is also deployed.

@lrossett is working on the lookaside upload script :)

rebased onto 014e389

2 years ago

Commits can be reviewed independently

3 new commits added

  • Add a little debugging info to the `get_sources` script
  • Adjust get_sources.sh to allow empty `sources` file
  • Add support for a different lookaside structure
2 years ago

Can we please get this merged and released?

Do we need to be asking which ${hashType}sum here instead?

Do we need to be asking which ${hashType}sum here instead?

No we don't because I see that you hoisted the weakhashdetection call up earlier. Ignore this

Pull-Request has been merged by bstinson

2 years ago
Metadata