-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathfile-line-process.sh
executable file
·73 lines (61 loc) · 2.02 KB
/
file-line-process.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
#
# file-line-process.sh <file> <base-path>
#
# Support script for processing of UTF-8 plain-text files, one line
# at a time. The <file> should contain one sentence per line, and
# words should be delimited by whitespace.
#
# Submit that one file, via perl script, to the parser. When done,
# move the file over to the $COMPLETED_DIR directory.
#
# <file> is the file to process
# <base-path> is the directory in which the test corpora are located.
#
# Example usage:
# ./file-line-process.sh file.txt
# ./file-line-process.sh /home/data/dir/file.txt /home/data
#
# Some versions of netcat require the -N flag, and some versions
# of netcat do not know about the -N flag. This is mega-annoying.
# Hack this to match your netcat.
netcat="nc -N"
# Set up assorted constants needed to run.
filename="$1"
basepath="$2"
coghost=$HOSTNAME
cogport=$PORT
observe=$OBSERVE
# Punt if the cogserver has crashed. Use netcat to ping it.
haveping=`echo foo | $netcat $coghost $cogport`
if [[ $? -ne 0 ]] ; then
echo "Error: Unable to ping cogserver; not processing file."
exit 1
fi
# Split the filename into two parts
alen=${#basepath}
blen=$(($alen+2))
rest=`echo $filename | cut -c$blen-500`
echo "$MSG file >>>$rest<<<"
# Remove everything after the last slash in the basepath.
base=`echo ${basepath%/*}`
splitdir=${base}/${IN_PROCESS_DIR}
subdir=${base}/${COMPLETED_DIR}
# Create directories if missing
mkdir -p $(dirname "$splitdir/$rest")
mkdir -p $(dirname "$subdir/$rest")
# Move article to temp directory, while processing.
cp "$filename" "$splitdir/$rest"
# Submit the text, line by line
cwd=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
cat "$splitdir/$rest" | $cwd/submit-lines.pl $coghost $cogport "$observe"
# Punt if the cogserver has crashed (second test,
# before doing the mv and rm below)
haveping=`echo foo | $netcat $coghost $cogport`
if [[ $? -ne 0 ]] ; then
echo "Error: Failed to ping cogserver after processing $rest"
exit 1
fi
# Move article to the done-queue
mv "$splitdir/$rest" "$subdir/$rest"
rm "$basepath/$rest"