Skip to content

Crawl cf.gov

Crawl cf.gov #1453

Workflow file for this run

name: Crawl cf.gov
# Triggers the workflow every day at 6AM UTC (1AM EST, 2AM EDT)
on:
schedule:
- cron: '0 6 * * *'
workflow_dispatch:
env:
DOMAIN: www.consumerfinance.gov
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Remove previous crawl results
run: rm -r ${{ env.DOMAIN }}
- name: Run the crawl script
run: ./crawl.sh -d 0 https://${{ env.DOMAIN }}
- name: Simplify crawled HTML to prevent unimportant diffs
run: postprocessing/transform_results.sh ${{ env.DOMAIN }}
continue-on-error: true
- name: Prepare files for git commit
run: |
gzip -f *.log
git add *.log.gz
git add ${{ env.DOMAIN }}
- name: Prepare commit message
run: ./generate_summary.sh ${{ env.DOMAIN }} > commit.txt
- name: Commit crawl results back to GitHub
run: |
git config user.email [email protected]
git config user.name Automated
git commit -F commit.txt
git push
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}