Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/irstlm-team/irstlm
Browse files Browse the repository at this point in the history
Solved conflicts
  • Loading branch information
Nicola Bertoldi authored and Nicola Bertoldi committed Jun 28, 2017
2 parents e66997d + 3715751 commit 3645767
Show file tree
Hide file tree
Showing 19 changed files with 70 additions and 231 deletions.
22 changes: 18 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
#directories
src/CMakeFiles/
CMakeFiles/
build/
inst/
.deps/
.DS_Store/
.idea/
autom4te.cache/

#files
Makefile
Makefile.in
cmake_install.cmake
CMakeCache.txt
*.o
*.lo
.deps
.DS_Store
.idea/
aclocal.m4
ar-lib
autom4te.cache/
compile
config.guess
config.h
Expand All @@ -22,8 +33,11 @@ ltmain.sh
m4
missing
stamp-h1
<<<<<<< HEAD
Makefile
Makefile.in
CMakeFiles
CMakeCache.txt
cmake_install.cmake
=======
>>>>>>> 37157515b45ee411a8375bb2680cbd85ced89d62
8 changes: 4 additions & 4 deletions doc/LMCompilation.tex
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
much as possible on disk rather than in RAM. \\

\begin{verbatim}
$> compile-lm --memmap=1 train.lm train.blm
$> compile-lm train.lm train.blm --memmap=1
\end{verbatim}
\noindent
This option clearly pays a fee in terms of speed, but is often the only way to proceed. It is also recommended
Expand All @@ -27,7 +27,7 @@
explicitly set the directory used for temporary computation through the
parameter ``--tmpdir''.
\begin{verbatim}
$> compile-lm --tmpdir=<mytmpdir> train.lm train.blm
$> compile-lm train.lm train.blm --tmpdir=<mytmpdir>
\end{verbatim}


Expand All @@ -36,7 +36,7 @@ \subsection{Inverted order of ngrams}
For a faster access, the ngrams can be stored in inverted order with the following two commands:
\begin{verbatim}
$> sort-lm.pl -inv -ilm train.lm -olm train.inv.lm
$> compile-lm train.inv.lm train.inv.blm --invert yes
$> compile-lm train.inv.lm train.inv.blm --invert=yes
\end{verbatim}

\paragraph{Warning:} The following pipeline is no more allowed!!
Expand All @@ -46,7 +46,7 @@ \subsection{Inverted order of ngrams}
}
\begin{verbatim}
$> cat train.lm | sort-lm.pl -inv | \
compile-lm /dev/stdin train.inv.blm --invert yes
compile-lm /dev/stdin train.inv.blm --invert=yes
\end{verbatim}


Expand Down
2 changes: 1 addition & 1 deletion scripts/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ wrapperbindir = @prefix@/bin
dist_wrapperbin_SCRIPTS = \
add-start-end.sh build-lm-qsub.sh build-lm.sh rm-start-end.sh split-ngt.sh mdtsel.sh \
build-sublm.pl goograms2ngrams.pl lm-stat.pl merge-sublm.pl ngram-split.pl sort-lm.pl split-dict.pl \
plsa.sh qplsa.sh
plsa.sh

EXTRA_DIST = wrapper

11 changes: 10 additions & 1 deletion scripts/build-lm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ OPTIONS:
-u|--uniform Use uniform word frequency for dictionary splitting (default false)
-b|--boundaries Include sentence boundary n-grams (optional, default false)
-v|--verbose Verbose
--debug Debug
-h|-?|--help Show this message
EOF
Expand Down Expand Up @@ -56,6 +57,7 @@ parts=3
inpfile="";
outfile=""
verbose="";
debug="";
smoothing="witten-bell";
prune="";
prune_thr_str="";
Expand Down Expand Up @@ -101,6 +103,8 @@ while [ "$1" != "" ]; do
;;
-v | --verbose ) verbose='--verbose';
;;
--debug ) debug='--debug';
;;
-h | -? | --help ) usage;
exit 0;
;;
Expand Down Expand Up @@ -142,7 +146,7 @@ echo "LOGFILE:$logfile"


if [ $verbose ] ; then
echo inpfile='"'$inpfile'"' outfile=$outfile order=$order parts=$parts tmpdir=$tmpdir prune=$prune smoothing=$smoothing dictionary=$dictionary verbose=$verbose prune_thr_str=$prune_thr_str >> $logfile 2>&1
echo inpfile='"'$inpfile'"' outfile=$outfile order=$order parts=$parts tmpdir=$tmpdir prune=$prune smoothing=$smoothing dictionary=$dictionary verbose=$verbose debug=$debug prune_thr_str=$prune_thr_str >> $logfile 2>&1
fi

if [ ! "$inpfile" -o ! "$outfile" ] ; then
Expand Down Expand Up @@ -236,6 +240,11 @@ while [ 1 ]; do fg 2> /dev/null; [ $? == 1 ] && break; done
echo "Merging language models into $outfile" >> $logfile 2>&1
$scr/merge-sublm.pl --size $order --sublm $tmpdir/lm.dict -lm $outfile $backoff >> $logfile 2>&1

if [ $debug == "--debug" ] ; then
echo "Debugging is active; hence, not removing temporary directory $tmpdir" >> $logfile 2>&1
exit 0
fi

echo "Cleaning temporary directory $tmpdir" >> $logfile 2>&1
rm $tmpdir/* 2> /dev/null

Expand Down
183 changes: 0 additions & 183 deletions scripts/qplsa.sh

This file was deleted.

4 changes: 2 additions & 2 deletions src/crc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ static const unsigned short crc16tab[256]= {

unsigned short crc16_ccitt(const char *buf, int len)
{
register int counter;
register unsigned short crc = 0;
int counter;
unsigned short crc = 0;
for( counter = 0; counter < len; counter++)
crc = (crc<<8) ^ crc16tab[((crc>>8) ^ *(char *)buf++)&0x00FF];
return crc;
Expand Down
1 change: 0 additions & 1 deletion src/dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class dictionary
char ifl; //!< increment flag
int dubv; //!< dictionary size upper bound
float load_factor; //!< dictionary loading factor
char* oov_str; //!< oov string

void test(int* OOVchart, int* NwTest, int curvesize, const char *filename, int listflag=0); // prepare into testOOV the OOV statistics computed on test set

Expand Down
8 changes: 4 additions & 4 deletions src/htable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ template <>
address htable<int *>::Hash(int* key)
{
address h;
register int i;
int i;

//Thomas Wang's 32 bit Mix Function
for (i=0,h=0; i<keylen; i++) {
Expand All @@ -71,8 +71,8 @@ address htable<char *>::Hash(char* key)
char *Key = *(char**)key;
int length=strlen(Key);

register address h=0;
register int i;
address h=0;
int i;

for (i=0,h=0; i<length; i++)
h = h * Prime1 ^ (Key[i] - ' ');
Expand All @@ -86,7 +86,7 @@ int htable<int*>::Comp(int *key1, int *key2) const
{
MY_ASSERT(key1 && key2);

register int i;
int i;

for (i=0; i<keylen; i++)
if (key1[i]!=key2[i]) return 1;
Expand Down
2 changes: 1 addition & 1 deletion src/interplm.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class interplm:public ngramtable
return 0;
}

virtual void adapt(char* /* unused parameter: ngtfile */, double /* unused parameter: w */) {}
virtual void adapt(char* /* unused parameter: ngtfile */, int /* unused parameter: l */, double /* unused parameter: w */) {}

virtual double prob(ngram /* unused parameter: ng */,int /* unused parameter: size */) {
return 0.0;
Expand Down
2 changes: 0 additions & 2 deletions src/linearlm.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ namespace irstlm {
class linearwb: public mdiadaptlm
{
int prunethresh;
int minfreqthresh;
public:
linearwb(char* ngtfile,int depth=0,int prunefreq=0,TABLETYPE tt=SHIFTBETA_B);
int train();
Expand All @@ -40,7 +39,6 @@ namespace irstlm {
class linearstb: public mdiadaptlm
{
int prunethresh;
int minfreqthresh;
public:
linearstb(char* ngtfile,int depth=0,int prunefreq=0,TABLETYPE tt=SHIFTBETA_B);
int train();
Expand Down
Loading

0 comments on commit 3645767

Please sign in to comment.