Skip to content

Added data quality measurement and updated workflow to print status #19

Added data quality measurement and updated workflow to print status

Added data quality measurement and updated workflow to print status #19

Workflow file for this run

name: Run Parse Fyrliste Script and Create PR
on:
pull_request:
branches:
- main
jobs:
generate-and-pr:
runs-on: ubuntu-latest
steps:
# Step 1: Check out the repository
- name: Checkout Repository
uses: actions/checkout@v3
with:
fetch-depth: 0 # Needed for creating branches
ref: anders/test-dq
# Step 2: Set up Python
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
# Step 3: Install dependencies (if any)
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install -r scripts/requirements.txt # If you have dependencies
# Step 4: Run the Python script
# - name: Run Script
# run: |
# python scripts/parse.py
- name: Run Script
run: |
touch hello.py
# Step 5: Run data quality check
# - name: Run Data Quality Check
# id: quality_check
# run: |
# python scripts/check_data_quality.py > quality_report.txt
# Step 6: Configure Git
- name: Configure Git
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
# Step 7: Check for changes and commit
- name: Commit Changes
id: commit_changes
run: |
git add hello.py
if git diff --cached --quiet; then
echo "No changes to commit."
echo "::set-output name=changes::false"
else
git commit -m "Update generated files [skip ci]"
echo "::set-output name=changes::true"
fi
- name: Store build timestamp
run: echo "BUILD_TIME=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
# Step 8: Create Pull Request if there are changes
- name: Create Pull Request
if: steps.commit_changes.outputs.changes == 'true'
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: Update generated files
branch: update-fyrliste-${{ github.run_number }}
title: "Updated Fyrliste to ${{ env.BUILD_TIME }}"
body: |
This PR updates the generated files based on the latest run.
labels: automated-pr
# You can specify the base branch if different from the default
base: main
# Step 9: Add data quality report as PR comment
- name: Comment on PR with Data Quality Report
if: steps.commit_changes.outputs.changes == 'true'
uses: actions/github-script@v6
with:
script: |
const fs = require('fs');
// List files in current directory for debugging
const files = fs.readdirSync('.');
console.log('Files in current directory:', files);
try {
const csvContent = fs.readFileSync('lighthouses_with_problems.csv', 'utf8');
const qualityReport = fs.readFileSync('quality_report.txt', 'utf8');
const prNumber = context.payload.pull_request ? context.payload.pull_request.number :
(await github.rest.pulls.list({
owner: context.repo.owner,
repo: context.repo.repo,
head: `${context.repo.owner}:update-fyrliste-${process.env.GITHUB_RUN_NUMBER}`,
state: 'open'
})).data[0].number;
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: prNumber,
body: '## Data Quality Report\n\n' +
'### Error Summary\n' + qualityReport + '\n\n' +
'### Detailed Problems\n```csv\n' + csvContent + '\n```'
});
} catch (error) {
console.error('Error reading files:', error);
console.log('Current working directory:', process.cwd());
}