-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
21 lines (16 loc) · 819 Bytes
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
CHUNKSIZE=100M
GIT=git
STACK=stack
WGET=wget
WIKILANG=de
docs/$(WIKILANG)/known.txt docs/$(WIKILANG)/known-subseq.txt: extracted.$(WIKILANG)
$(STACK) run wikiwc -- -w words.$(WIKILANG) -k docs/$(WIKILANG)/known.txt -s docs/$(WIKILANG)/known-subseq.txt extracted.$(WIKILANG)/*/wiki_* +RTS -N
clean:
rm -rf extracted.$(WIKILANG) $(WIKILANG)wiki-latest-pages-articles.xml.bz2
rm -rf wikiextractor
extracted.$(WIKILANG): $(WIKILANG)wiki-latest-pages-articles.xml.bz2 wikiextractor
$(PYTHON) wikiextractor/WikiExtractor.py -b$(CHUNKSIZE) -o $@ $(WIKILANG)wiki-latest-pages-articles.xml.bz2
wikiextractor:
$(GIT) clone https://github.com/attardi/wikiextractor
$(WIKILANG)wiki-latest-pages-articles.xml.bz2:
$(WGET) http://download.wikimedia.org/$(WIKILANG)wiki/latest/$(WIKILANG)wiki-latest-pages-articles.xml.bz2