Skip to content

Generate Release

Generate Release #1066

Workflow file for this run

name: Generate Release
on:
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
jobs:
sitemaps:
runs-on: ubuntu-20.04
name: Fetch Sitemaps
steps:
- name: Fetch Sitemap Index
run: |
wget \
--no-verbose \
--tries 3 \
--waitretry 10 \
--retry-on-http-error 500,503 \
--user-agent "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}" \
--output-document sitemapindex.xml \
'https://chrome.google.com/webstore/sitemap'
- name: Extract Sitemap URLs
run: |
grep -F '<loc>' ./sitemapindex.xml |
awk -F '[<>]' '{ gsub(/&amp;/, "\\&", $3); print $3 }' > sitemapindex.txt
- name: Fetch Sitemap Shards
run: |
sort --random-sort ./sitemapindex.txt |
grep -v "&hl=" |
parallel \
--max-procs 10 \
--max-replace-args 100 \
--pipe \
"wget \
--no-verbose \
--tries 3 \
--waitretry 10 \
--retry-on-http-error 500,503 \
--user-agent '${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}' \
--directory-prefix . \
--input-file -"
- name: Extract URLs
run: |
grep -F '<loc>' "sitemap?"* |
awk -F '[<>]' '{ gsub(/&amp;/, "\\&", $3); print $3 }' > ./urls.txt
- name: Extract Extensions
run: |
awk -F/ '{ print substr($7, 0, 32) }' ./urls.txt | sort -u > ./extensions.txt
- name: Upload
uses: actions/upload-artifact@v2
with:
name: sitemaps
path: "*.txt"
if-no-files-found: error
retention-days: 1
details:
runs-on: ubuntu-20.04
needs: sitemaps
name: Fetch Details (${{matrix.prefix}})
strategy:
max-parallel: 16
matrix:
prefix: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p']
steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.9"
- name: Download URLs
uses: actions/download-artifact@v2
with:
name: sitemaps
path: sitemaps
- name: Fetch Details
run: |
awk -F'/' '$0 ~ /^${{matrix.prefix}}/ {
print "https://chrome.google.com/webstore/ajax/detail?pv=20210820&id=" $0
}' ./sitemaps/extensions.txt | parallel \
--max-procs 10 \
--max-replace-args 10000 \
--pipe \
"wget \
--no-verbose \
--tries 3 \
--waitretry 10 \
--retry-on-http-error 500,503 \
--user-agent '${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}' \
--output-document details.{#}.json \
--method POST \
--input-file -" || true
- name: Extract Details
run: |
python extract.py ${{matrix.prefix}}
- name: Upload Manifests
uses: actions/upload-artifact@v2
with:
name: details
path: "*.tar"
if-no-files-found: error
retention-days: 1
release:
runs-on: ubuntu-20.04
needs: details
name: Release
steps:
- name: Download Sitemaps
uses: actions/download-artifact@v2
with:
name: sitemaps
path: sitemaps
- name: Compress Sitemaps
working-directory: sitemaps
run: |
tar -cvjSf ../sitemaps.tar.bz2 .
- name: Download Manifest
uses: actions/download-artifact@v2
with:
name: details
path: details
- name: Concatenate Manifests
run: |
ls -1 details/manifests-*.tar | xargs -n1 tar --concatenate --file manifests.tar
- name: Compress Manifests
run: |
bzip2 manifests.tar
- name: Concatenate Details
run: |
ls -1 details/details-*.tar | xargs -n1 tar --concatenate --file details.tar
- name: Compress Details
run: |
bzip2 details.tar
- name: Generate Tag
id: tag
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Create Release
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
run: |
gh release create \
--repo "$GITHUB_REPOSITORY" \
--title "${{ steps.tag.outputs.date }}" \
--notes "Action URL: ${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}" \
"${{ steps.tag.outputs.date }}"
- name: Upload Artifacts
env:
GITHUB_TOKEN: ${{ secrets.RELEASE_TOKEN }}
run: |
gh release upload \
--repo "$GITHUB_REPOSITORY" \
"${{ steps.tag.outputs.date }}" \
"sitemaps.tar.bz2" \
"manifests.tar.bz2" \
"details.tar.bz2"