-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaction.yml
182 lines (161 loc) · 5.91 KB
/
action.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
name: 'Custom Artsdata Pipeline Action'
description: 'A custom action for the Artsdata pipeline'
inputs:
mode:
description: 'Mode of the action(fetch/push/fetch-push)'
required: true
default: 'push'
page-url:
description: 'URL of the page to crawl'
entity-identifier:
description: 'Identifier of the entity'
downloadFile:
description: 'Name of the file to download with extension'
downloadUrl:
description: 'URL of the file to download'
is-paginated:
description: 'Whether the page is paginated'
default: 'false'
headless:
description: 'Whether to run in headless mode'
default: 'false'
artifact:
description: 'Name of the artifact'
token:
description: 'GitHub token'
secret: true
publisher:
description: 'URI of the publisher'
secret: true
comment:
description: 'Comment'
group:
description: 'Group of artifacts/versions. Typically the name of the tool creating the artifact. Use unreserved characters.'
version:
description: 'Version of the artifact. Usually a date. For example: 2020-10-23. Use unreserved characters.'
reportCallbackUrl:
description: 'URL to send back the data validation report asynchronously using POST "Content-Type: application/json"'
shacl:
description: 'URL to the SHACL file'
fetch-urls-headlessly:
description: 'Set as true to fetch the entity URLs headlessly'
offset:
description: 'Offset for paginated pages'
runs:
using: 'composite'
steps:
- name: Check out the repository
uses: actions/checkout@v3
- name: Check mode requirements
run: |
if [[ "${{ inputs.mode }}" == "fetch" || "${{ inputs.mode }}" == "fetch-push" ]]; then
if [[ -z "${{ inputs.page-url }}" ]]; then
echo "Page URL is required for ${{ inputs.mode }} mode."
exit 1
fi
if [[ -z "${{ inputs.entity-identifier }}" ]]; then
echo "Entity Identifier is required for ${{ inputs.mode }} mode."
exit 1
fi
if [[ -z "${{ inputs.downloadFile }}" ]]; then
echo "Download File is required for ${{ inputs.mode }} mode."
exit 1
fi
if [[ -z "${{ inputs.token }}" ]]; then
echo "Token is required for ${{ inputs.mode }} mode."
exit 1
fi
fi
if [[ "${{ inputs.mode }}" == "push" || "${{ inputs.mode }}" == "fetch-push" ]]; then
if [[ -z "${{ inputs.artifact }}" ]]; then
echo "Artifact is required for ${{ inputs.mode }} mode."
exit 1
fi
if [[ -z "${{ inputs.publisher }}" ]]; then
echo "Publisher is required for ${{ inputs.mode }} mode."
exit 1
fi
fi
if [[ "${{ inputs.mode }}" == "push" ]]; then
if [[ -z "${{ inputs.downloadUrl }}" ]]; then
echo "Download URL is required for push mode."
exit 1
fi
fi
shell: bash
- name: Fetch Data using Docker
if: ${{ inputs.mode == 'fetch' || inputs.mode == 'fetch-push' }}
run: |
isPaginated=${{ inputs.is-paginated || 'false' }}
headless=${{ inputs.headless || 'false' }}
fetchUrlsHeadlessly=${{ inputs.fetch-urls-headlessly || 'false' }}
offset=${{ inputs.offset || '1' }}
docker pull ghcr.io/culturecreates/artsdata-pipeline-action/artsdata-rdf-fetcher:latest
docker run --shm-size=1g -v $(pwd)/output:/usr/src/app/output ghcr.io/culturecreates/artsdata-pipeline-action/artsdata-rdf-fetcher:latest \
"${{ inputs.page-url }}" \
"${{ inputs.entity-identifier }}" \
"output/${{ inputs.downloadFile }}" \
"$isPaginated" \
"$headless" \
"$fetchUrlsHeadlessly" \
"$offset"
shell: bash
- name: Commit and Push Changes
if: ${{ inputs.mode == 'fetch' || inputs.mode == 'fetch-push' }}
run: |
git config --local user.email "[email protected]"
git config --local user.name "GitHub Actions"
git pull
git add "output/${{ inputs.file_name }}"
git commit -m "Add data generated by the script"
git push
shell: bash
env:
GITHUB_TOKEN: ${{ inputs.token }}
- name: Push to Artsdata
if: ${{ inputs.mode == 'push' || inputs.mode == 'fetch-push' }}
run: |
artifact=${{ inputs.artifact }}
publisher=${{ inputs.publisher }}
downloadFile=${{ inputs.downloadFile }}
repo=${{ github.repository }}
ref=${{ github.ref }}
group=${{ inputs.group }}
version=${{ inputs.version }}
comment=${{ inputs.comment }}
if [[ "${{ inputs.mode }}" == "push" ]]; then
downloadUrl=${{ inputs.downloadUrl }}
else
downloadUrl="https://raw.githubusercontent.com/${repo}/${ref}/output/${downloadFile}"
fi
group=${group:-${repo#*/}}
version=${version:-$(date +%Y-%m-%dT%H:%M:%S | sed 's/:/_/g')}
comment=${comment:-"Published by ${group} on ${version}"}
data=$(jq -n \
--arg artifact "$artifact" \
--arg publisher "$publisher" \
--arg group "$group" \
--arg version "$version" \
--arg downloadUrl "$downloadUrl" \
--arg downloadFile "$downloadFile" \
--arg comment "$comment" \
'{
artifact: $artifact,
publisher: $publisher,
group: $group,
version: $version,
downloadUrl: $downloadUrl,
downloadFile: $downloadFile,
comment: $comment
}'
)
echo "Data: $data"
response=$(curl -s -w "%{http_code}" -o response.txt -X POST http://api.artsdata.ca/databus/ \
-H "Content-Type: application/json" \
-d "$data")
if [[ "$response" -ne 200 ]]; then
echo "Error: $(cat response.txt)"
else
echo "Success: Data posted successfully."
fi
shell: bash