-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscript.sh
executable file
·65 lines (51 loc) · 1.51 KB
/
script.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env bash
# SPDX-License-Identifier: Apache-2.0
# Copyright 2024 The Linux Foundation
# OS-Climate / Data Extraction Team
# Script repository/location:
# https://github.com/os-climate/osc-data-extraction-scripts
### Bulk execution script ###
set -o pipefail
# set -vx
### Variables
# Folder location of input PDF files on EFS/NFS mount
SOURCE="inputs"
# Wildcard that selects the number of files to process
SELECTION="e15*.pdf"
### Functions
_process_files() {
echo "Processing: $1"
sleep 3
}
export -f _process_files
NPROC_CMD=$(which nproc)
if [ ! -x "$NPROC_CMD" ]; then
echo "Error: nproc command not found in PATH"
exit 1
fi
# Determined dynamically, but can be hard-wired to a fixed value
# Alternatively, the number available to Docker can be capped
THREADS=$($NPROC_CMD)
echo "OS-Climate / Data Extraction Team"
echo "Bulk execution script"
if [ ! -s /etc/localtime ]; then
echo "Setting timezone"
ln -fs /usr/share/zoneinfo/Europe/London /etc/localtime
fi
if ! (which parallel > /dev/null 2>&1); then
echo "Installing GNU parallel"
apt-get update -qq
apt-get install -qq parallel > /dev/null 2>&1
fi
echo "Parallel threads for batch processing: $THREADS"
START=$(date '+%s')
echo -n "Input files to process: "
# shellcheck disable=SC2012,SC2086
FILES=$(ls $SOURCE/$SELECTION)
echo "$FILES" | wc -l
# shellcheck disable=SC2012
echo "$FILES" | parallel -j "$THREADS" _process_files
END=$(date '+%s')
ELAPSED=$((END-START))
echo "Elapsed time in seconds: $ELAPSED"
echo "Batch job completed!"