-
Notifications
You must be signed in to change notification settings - Fork 3
/
upload_viollier_raw
executable file
·113 lines (96 loc) · 2.88 KB
/
upload_viollier_raw
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/bin/bash
configfile=viollier.conf
usage() { echo "Usage: $0 [-c <configfile>] [ -s <subdir> ] [ -q ] TSVFILE " 1>&2; exit $1; }
subdir=$(date '+%Y-%m-%d')
verbose='v'
while getopts "c:s:qh" o; do
case "${o}" in
c) configfile=${OPTARG}
if [[ ! -r ${configfile} ]]; then
echo "Cannot read ${configfile}" 1>&2
usage 1
fi
;;
q) verbose='' ;;
s) subdir=${OPTARG} ;;
h) usage 0 ;;
*) usage 1 ;;
esac
done
shift $((OPTIND-1))
. ${configfile}
: ${fileserver:?}
# ${srvport}
: ${expname:?}
: ${basedir:=$(pwd)}
: ${download:?}
: ${sampleset:?}
target_base=raw_othercenters
target=${target_base}/${subdir}
(( ${#@} == 0 )) && usage 2
[[ "${1}" == '-h' || "${1}" == '--help' ]] && usage 0
if [[ ! -d ${basedir}/${download}/ ]]; then
echo "missing directory ${download}" >&2
exit 1
elif [[ ! -d ${basedir}/${sampleset}/ ]]; then
echo "missing sampleset ${sampleset}" >&2
exit 1
fi
if [[ ! -r "${1}" ]]; then
echo "Cannot read file ${1}" >&2
exit 2
fi
mkdir -p ${basedir}/${download}/${target}
cd ${basedir}
error=0
while read aufxx ethid rest; do
# remove DOS/Windows-style CR+LF line ending
auf=${aufxx%%$'\r'}
# skip empty lines
if [[ -z ${auf} ]]; then
continue
elif [[ ! ${auf} =~ ^[[:digit:]]{8,} ]]; then
echo "cannot parse ${auf}" >&2
error=1
continue
fi
aufnum=${BASH_REMATCH[0]}
if [[ -z ${ethid} ]]; then
rx="^([[:digit:]]*[[:alpha:]]+[[:alnum:]-]+_)?${auf}[^\t]*"
ethid=$(grep -hoP "${rx}" ${sampleset}/samples.*.tsv)
if [[ -z ${ethid} ]]; then
ethid=$(grep -hoP "${rx}" ${sampleset}/missing.*.txt)
if [[ -z ${ethid} ]]; then
echo "(missing ${aufnum})"
else
echo "(zero yield for ${aufnum} in ${ethid//$'\n'/ })"
fi
continue
elif [[ ${ethid} == *$'\n'* ]]; then
echo -e "(${aufnum}: multiple matches: ${ethid//$'\n'/ }\nUploading all matches)"
fi
elif [[ ! ${ethid} =~ ^[[:digit:]]{6,} ]]; then
echo "cannot parse ${ethid}" >&2
error=1
continue
fi
[[ -d ${download}/${target}/${aufnum}/ ]] && rm -rf ${download}/${target}/${aufnum}/
mkdir -p ${download}/${target}/${aufnum}
# copy each duplicate sample in a separate subdirectory
for singleethid in ${ethid}
do
echo "${singleethid} -> ${aufnum}"
for b in ${sampleset}/${singleethid}/*; do
bname="$(basename "${b}")"
# NOTE make sure to distinguish samples by their internal ETHID, in case that they come from the same batch, e.g.:
#
# if '34889065' maps to both '34889065_A9' and '34889065_C9' from the same batch '20211203_HTHMGDRXY':
# 'sampleset/34889065_A9/20211203_HTHMGDRXY' => '...{target}/34889065/20211203_HTHMGDRXY-34889065_A9'
# 'sampleset/34889065_C9/20211203_HTHMGDRXY' => '...{target}/34889065/20211203_HTHMGDRXY-34889065_C9'
#
cp --link -${verbose}rf ${b} ${download}/${target}/${aufnum}/${bname}-${singleethid} || error=1
done
done
done < <( cat ${1}; echo; )
(( error )) && exit 0
exec ./upload_sftp -c ${configfile} ${target_base}