forked from mediamicroservices/mm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
verifytree
executable file
·188 lines (175 loc) · 20.7 KB
/
verifytree
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#!/bin/bash
#verifytree creates a temporary xml of package and compares against a set directory structure
#create temp xml of package and set xml, compare the two. xml starlet, temp has to exclude the tree.xml file in temp creation
SCRIPTDIR="$(dirname "${0}")"
. "${SCRIPTDIR}/mmfunctions" || { echo "Missing '${SCRIPTDIR}/mmfunctions'. Exiting." ; exit 1 ;};
unset DEPENDENCIES
DEPENDENCIES=(xmlstarlet)
#define _runtest function
_runtest(){
OPTIND=1
INVERT_TEST="N"
while getopts ":i" OPT ; do
case "${OPT}" in
i) INVERT_TEST="Y";;
esac
done
shift $(( ${OPTIND} - 1 ))
LABEL="${1}"
shift
RUN_ERR=""
XMLRESULT=$("${@}") #the result of the xpath statement
RUN_ERR="${?}"
if [[ "${RUN_ERR}" != 0 ]] ; then
_report "${LABEL} #fail" #the error message associated with each xpath query
_report -wts "Error: Running: \"${*}\" gave an Error Code - ${RUN_ERR}"
echo "${XMLRESULT}"
fi
if [[ ! -z "${XMLRESULT}" && "${INVERT_TEST}" != "Y" ]] || [[ -z "${XMLRESULT}" && "${INVERT_TEST}" == "Y" ]] ; then
_report -w "${LABEL}" #the error message associated with each xpath query
if [[ ! -z "${XMLRESULT}" ]] ; then
_report "${XMLRESULT}"
fi
fi
}
#input the package
while [ "${*}" != "" ] ; do
#look for tree.xml
PACKAGE="${1}"
shift
if [ -d "${PACKAGE}" ] ; then
OUTPUTDIR="${PACKAGE}/metadata"
MEDIAID=$(basename "${PACKAGE}")
_report -dt "running verifytree on ${PACKAGE}" #the name of the package being verified
#make a new temp tree
TEMPTREE=$(_maketemp)
tree -DaNXs --du --timefmt "%Y-%m-%dT%H:%M:%SZ" -I "tree.xml|.DS_Store" "${PACKAGE}" > "${TEMPTREE}"
#if tree.xml exists, compare it to the specified xpath expressions of an AIP
#check most common object extension
MOSTCOMMONEXTENSION="$(xmlstarlet sel -t -m "/tree/directory/directory[@name='objects']/file|/tree/directory/directory[@name='objects']/directory[@name='restoration']/file" -v "substring-after(@name, '.')" -n "${TEMPTREE}"| while read j ; do echo "${j##*.}" ; done | sort | uniq -c | sort -n -r | head -n 1 | awk '{print $2}')"
#if the most common extension is .wav, then run audio-specific tests
MOSTCOMMONEXTENSION_LOWER="$(echo "${MOSTCOMMONEXTENSION}" | awk '{print tolower($0)}')"
AUDIO_EXTENSION_LIST="|mp3|aif|aiff|wav|m4a|"
if [[ "${AUDIO_EXTENSION_LIST}" =~ "|${MOSTCOMMONEXTENSION_LOWER}|" ]] ; then
_report -dt "running verifytree in audio mode on ${PACKAGE}" #the name of the package being verified
# Here are tests for audio packages specifically
#makes sure there is a podcast directory
_runtest -i "This package is missing the podcast directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='podcast']/@name" -n "${TEMPTREE}"
#makes sure there is a mp3 directory
_runtest -i "This package is missing the mp3 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='mp3']/@name" -n "${TEMPTREE}"
#looks in logs directory for any files that aren't .log or .txt
_runtest "There are files in the logs directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='logs']/file[substring(@name,string-length(@name)-3)!='xlsx' and substring(@name,string-length(@name)-2)!='txt' and substring(@name,string-length(@name)-2)!='log' and substring(@name,string-length(@name)-2)!='xls']/@name" -n "${TEMPTREE}"
#looks in fileMeta access for unexpected directories that are not dvd, mp3, podcast, or youtube_up
_runtest "There are directories in the fileMeta access directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name='objects']/directory[@name='access']/directory[@name!='mp3' and @name!='podcast' and @name!='showwaves']/@name" -n "${TEMPTREE}"
#looks for unexpected files in podcast directory
_runtest "There are unexpected files in the podcast directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='podcast']/file[@name!='${MEDIAID}_podcast.m4a']/@name" -n "${TEMPTREE}"
#looks for any directories within access directory that are not dvd, images, mp3, podcast, or youtube_up
_runtest "There are unexpected directories in the access directory." xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name!='mp3' and @name!='showwaves' and @name!='podcast']/@name" -n "${TEMPTREE}"
#looks for audiograph file in depictions directory
_runtest -i "There is no audiograph file in the depictions directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='depictions']/file[@name='${MEDIAID}_audiographs.png']/@name" -n "${TEMPTREE}"
#looks for waveform file in depictions directory
_runtest -i "There is no waveform file in the depictions directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='depictions']/file[@name='${MEDIAID}_waveform.png']/@name" -n "${TEMPTREE}"
elif [[ "${MOSTCOMMONEXTENSION}" = "tif" ]] ; then
_report -dt "running verifytree in text mode on ${PACKAGE}" #the name of the package being verified
# Here are tests for text packages specifically
#makes sure there is a pdf_1 directory
_runtest -i "This package is missing the pdf_1 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='pdf_1']/@name" -n "${TEMPTREE}"
#makes sure there is a pdf_1 directory
_runtest -i "This package is missing the pdf_1 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='pdf_1']/@name" -n "${TEMPTREE}"
#looks in logs directory for any files that aren't .log or .txt
_runtest "There are files in the logs directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='logs']/file[substring(@name,string-length(@name)-2)!='log' and substring(@name,string-length(@name)-2)!='txt' and substring(@name,string-length(@name)-3)!='jpeg' and substring(@name,string-length(@name)-2)!='.gz' and substring(@name,string-length(@name)-2)!='md5']/@name" -n "${TEMPTREE}"
#looks in fileMeta access for unexpected directories that are not pdf_1 or txt_1
_runtest "There are directories in the fileMeta access directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name='objects']/directory[@name='access']/directory[@name!='pdf_1' and @name!='txt_1']/@name" -n "${TEMPTREE}"
#looks for any directories within access directory that are not pdf_1 or txt_1
_runtest "There are unexpected directories in the access directory." xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name!='pdf_1' and @name!='txt_1']/@name" -n "${TEMPTREE}"
else
_report -dt "running verifytree in video mode on ${PACKAGE}" #the name of the package being verified
# Here are tests for non-audio packages (video and text-based) specifically
#makes sure there is a service directory
_runtest -i "This package is missing the service directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='service']/@name" -n "${TEMPTREE}"
#makes sure there is a youtube directory
_runtest -i "This package is missing the youtube directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='youtube_up']/@name" -n "${TEMPTREE}"
#looks in logs directory for any files that aren't .log, .txt, .md5 or .qctools.mkv
_runtest "There are files in the logs directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='logs']/file[substring(@name,string-length(@name)-2)!='log' and substring(@name,string-length(@name)-2)!='txt' and substring(@name,string-length(@name)-10)!='qctools.mkv' and substring(@name,string-length(@name)-3)!='jpeg' and substring(@name,string-length(@name)-2)!='.gz' and substring(@name,string-length(@name)-2)!='md5']/@name" -n "${TEMPTREE}"
#looks in fileMeta access for unexpected directories that are not dvd, mp3, podcast, or youtube_up
_runtest "There are directories in the fileMeta access directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name='objects']/directory[@name='access']/directory[@name!='dvd' and @name!='mp3' and @name!='podcast' and @name!='resourcespace' and @name!='youtube_up' and @name!='pdf_1' and @name!='txt_1']/@name" -n "${TEMPTREE}"
#looks for unexpected files in podcast directory
_runtest "There are unexpected files in the podcast directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='podcast']/file[@name!='${MEDIAID}_podcast.mov']/@name" -n "${TEMPTREE}"
#looks for any directories within access directory that are not dvd, images, mp3, podcast, or youtube_up
_runtest "There are unexpected directories in the access directory." xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name!='dvd' and @name!='images' and @name!='mp3' and @name!='podcast' and @name!='resourcespace' and @name!='youtube_up']/@name" -n "${TEMPTREE}"
fi
#searching for upper-level directories
#looks for any directories that are not objects or metadata or tmp (for digitized materials)
_runtest "There are directories that are not objects, metadata, or tmp in this package." xmlstarlet sel -t -v "/tree/directory/directory[@name!='objects' and @name!='metadata' and @name!='tmp']/@name" -n "${TEMPTREE}"
#makes sure there is an objects directory
_runtest -i "This package is missing an objects directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/@name" -n "${TEMPTREE}"
#makes sure there is an metadata directory
_runtest -i "This package is missing a metadata directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/@name" -n "${TEMPTREE}"
#checks to see if there are files in directories
_runtest "This package has empty directories!" xmlstarlet sel -t -v "/tree/directory//directory[not(directory|file)]/@name" -n "${TEMPTREE}"
#begin search of metadata
#looks for any unexpected files in the upper level metadata directory -- files that aren't .md5, .log, .xml
_runtest "There are files in the metadata directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/file[@name!='checksum.md5' and @name!='checksumchecks.log' and @name!='dfxml.xml' and @name!='mets.xml' and not(contains(@name,'checksum')) and not(contains(@name,'dfxml'))]/@name" -n "${TEMPTREE}"
#makes sure a checksum.md5 is in the package
_runtest -i "This package needs a checksum.md5!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/file[@name='checksum.md5']/@name" -n "${TEMPTREE}"
#begins search of metadata subdirectories, looks for directories that do not belong
_runtest "There are directories in the metadata directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name!='depictions' and @name!='fileMeta' and @name!='frameMD5s' and @name!='logs' and @name!='fingerprints']/@name" -n "${TEMPTREE}"
#makes sure there is a capture.log in the log directory
_runtest -i "This package needs a capture.log!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='logs']/file[@name='capture.log']/@name" -n "${TEMPTREE}"
#makes sure there is a fileMeta directory
_runtest -i "This package needs a fileMeta directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/@name" -n "${TEMPTREE}"
#looks for any directories in fileMeta that are not objects
_runtest "There are directories in fileMeta that are not objects." xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name!='objects']/@name" -n "${TEMPTREE}"
#looks for any directories in fileMeta objects subdirectory that are not access or service
_runtest "There are directories in the fileMeta objects directory that are not access or service." xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name='objects']/directory[@name!='access' and @name!='reformatted' and @name!='service' and @name!='trimmed_materials' and @name!='restoration' and @name!='captions']/@name" -n "${TEMPTREE}"
#and any files that aren't .txt, .xml, .json
_runtest "There are files in the fileMeta objects directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name='objects']/file[substring(@name,string-length(@name)-2)!='xml' and substring(@name,string-length(@name)-2)!='txt' and substring(@name,string-length(@name)-3)!='json']/@name" -n "${TEMPTREE}"
#looks in fileMeta service subdirectory for that aren't .txt, .xml, .json
_runtest "There are files in the fileMeta service directory that do not belong!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name='objects']/directory[@name='service']/file[substring(@name,string-length(@name)-2)!='xml' and substring(@name,string-length(@name)-2)!='txt' and substring(@name,string-length(@name)-3)!='json']/@name" -n "${TEMPTREE}"
#looks in all directories under access for unexpected metadata files-- files that aren't .txt, .xml, .json
_runtest "There are unexpected metadata files in fileMeta access subdirectories!" xmlstarlet sel -t -v "/tree/directory/directory[@name='metadata']/directory[@name='fileMeta']/directory[@name='objects']/directory[@name='access']/directory/file[substring(@name,string-length(@name)-2)!='xml' and substring(@name,string-length(@name)-2)!='txt' and substring(@name,string-length(@name)-3)!='json']/@name" -n "${TEMPTREE}"
#begin search of objects
#looks for any directories that are not access, service, or captions within the objects directory
_runtest "There are directories other than access and service in the objects directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name!='access' and @name!='reformatted' and @name!='service' and @name!='trimmed_materials' and @name!='captions' and @name!='restoration']/@name" -n "${TEMPTREE}"
#makes sure there is an object in the objects directory
_runtest -i "There isn't an object in the objects directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/file[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected files in youtube_up directory
_runtest "There are unexpected files in the youtube_up directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='youtube_up']/file[@name!='${MEDIAID}.mp4' and not(starts-with(@name,'${MEDIAID}_SEG') and substring(@name,string-length(@name)-3)='.mp4')]/@name" -n "${TEMPTREE}"
#looks for unexpected directories in youtube_up directory
_runtest "There are unexpected directories in the youtube_up directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='youtube_up']/directory[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected files in dvd directory
_runtest "There are unexpected files in the dvd directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='dvd']/file[@name!='${MEDIAID}.iso']/@name" -n "${TEMPTREE}"
#looks for unexpected directories in dvd directory
_runtest "There are unexpected directories in the dvd directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='dvd']/directory[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected files in pdf_1 directory
_runtest "There are unexpected files in the pdf_1 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='pdf_1']/file[@name!='${MEDIAID}.pdf']/@name" -n "${TEMPTREE}"
#looks for unexpected directories in pdf_1 directory
_runtest "There are unexpected directories in the pdf directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='pdf_1']/directory[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected files in txt_1 directory
_runtest "There are unexpected files in the txt_1 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='txt_1']/file[@name!='${MEDIAID}.txt']/@name" -n "${TEMPTREE}"
#looks for unexpected directories in txt_1 directory
_runtest "There are unexpected directories in the txt_1 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='txt_1']/directory[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected files in images directory-- files that aren't .tiff
_runtest "There are unexpected files in the images directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='images']/file[substring(@name,string-length(@name)-3)!='tiff']/@name" -n "${TEMPTREE}"
#looks for unexpected directories in images directory
_runtest "There are unexpected directories in the images directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='images']/directory[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected files in mp3 directory
_runtest "There are unexpected files in the mp3 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='mp3']/file[@name!='${MEDIAID}.mp3']/@name" -n "${TEMPTREE}"
#looks for unexpected directories in mp3 directory
_runtest "There are unexpected directories in the mp3 directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='mp3']/directory[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected directories in podcast directory
_runtest "There are unexpected directories in the podcast directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='access']/directory[@name='podcast']/directory[count(@name)>0]/@name" -n "${TEMPTREE}"
#looks for unexpected files in service directory
_runtest "There are unexpected files in the service directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='service']/file[@name!='${MEDIAID}.mov' and @name!='${MEDIAID}_SLATE.mov']/@name" -n "${TEMPTREE}"
#looks for unexpected files in captions directory
_runtest "There are unexpected files in the captions directory!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']/directory[@name='captions']/file[substring(@name,string-length(@name)-2)!='scc']/@name" -n "${TEMPTREE}"
#finds any empty files
_runtest "There are empty files in your package!" xmlstarlet sel -t -v "//file[@size='0']/@name" -n "${TEMPTREE}"
#checks to see if there is a package within the package
_runtest "There is another package within this package!" xmlstarlet sel -t -v "/tree/directory/directory[@name='objects']//directory[@name='objects']/@name" -n "${TEMPTREE}"
#makes sure no file names contain colons
_runtest "There are improperly named files in this package!" xmlstarlet sel -t -v "//file[contains(@name,':')]/@name" -n "${TEMPTREE}"
#finds any hidden files
_runtest "There are hidden files in this package!" xmlstarlet sel -t -v "//file[starts-with(@name,'.')]/@name" -n "${TEMPTREE}"
fi
done