### VARIABLES
SHELL=/bin/bash -x

### DIRECTORY PATHS
SCRIPTS=src/scripts
GENERATED=generated
TWNC=data/ext/TwNC-0.2
TWNC_TOKENIZED=$(TWNC)/tokenized

### SCRIPT PATHS
IOBIFY=go/bin/iobify
PARAGRAPHY_TWNC_RAW=$(SCRIPTS)/make_twnc_data_raw.py
PARAGRAPHY_TWNC_TOK=$(SCRIPTS)/make_twnc_data_tok.py
CHARCAT=$(SCRIPTS)/charcat.py
EVAL=$(SCRIPTS)/eval.py
WAPEXP=$(SCRIPTS)/wapexp.py
TIME=time
#TIME=/usr/bin/time -o

### EXTRACTING FEATURES

generated/%.feat : data/%.iob $(CHARCAT)
	paste <(cat $< | cut -f 1 -d ' ') <(cat $< | cut -f 1 -d ' ' | $(CHARCAT)) <(cat $< | cut -f 2 -d ' ') -d ' ' > $@

### GENERATING PATTERN FILES

patterns/%.wappat : patterns/%.waptmp $(WAPEXP) 
	cat $< | $(WAPEXP) > $@

### TRAINING MODELS
### Model file names have the form <corpus>.train.<pattern>.model

generated/%.model :
	#export DEV=generated/$$(echo $(notdir $@) | sed -e 's/\(.*\)\..*\..*\..*/\1/').dev.feat; make $$DEV; export FEAT=generated/$$(echo $(notdir $@) | sed -e 's/\(.*\..*\)\..*\..*/\1/').feat; make $$FEAT; export WAPPAT=patterns/$$(echo $(notdir $@) | sed -e 's/.*\..*\.\(.*\)\..*/\1/').wappat; $(TIME) $@.time wapiti train -t 4 -p $$WAPPAT --devel $$DEV --stopwin 20 $$FEAT $@
	export DEV=generated/$$(echo $(notdir $@) | sed -e 's/\(.*\)\..*\..*\..*/\1/').dev.feat; make $$DEV; export FEAT=generated/$$(echo $(notdir $@) | sed -e 's/\(.*\..*\)\..*\..*/\1/').feat; make $$FEAT; export WAPPAT=patterns/$$(echo $(notdir $@) | sed -e 's/.*\..*\.\(.*\)\..*/\1/').wappat; wapiti train -t 4 -p $$WAPPAT --devel $$DEV --stopwin 20 $$FEAT $@

### LABELING
### Labeling result file names have the form <corpus>.(dev|test).<pattern>.labeled

generated/%.labeled :
	#export MODEL=generated/$$(echo $(notdir $@) | sed -e 's/\.labeled$$/.model/' | sed -e 's/\.dev\.\|\.test\./.train./'); make $$MODEL; export FEAT=generated/$$(echo $(notdir $@) | sed -e 's/\(.*\..*\)\..*\.labeled/\1.feat/'); make $$FEAT; $(TIME) $@.time wapiti label -m $$MODEL $$FEAT | sed -e 's/\t/ /g' > $@
	export MODEL=generated/$$(echo $(notdir $@) | sed -e 's/\.labeled$$/.model/' | sed -e 's/\.dev\.\|\.test\./.train./'); make $$MODEL; export FEAT=generated/$$(echo $(notdir $@) | sed -e 's/\(.*\..*\)\..*\.labeled/\1.feat/'); make $$FEAT; wapiti label -m $$MODEL $$FEAT | sed -e 's/\t/ /g' > $@

### EVALUATION
### Evaluation result file names have the form <corpus>.(dev|test).<pattern>.eval

generated/%.eval : generated/%.labeled $(EVAL)
	cat $< | $(EVAL) > $@

### EXTRACTING DATA FROM TWNC

$(IOBIFY) : go/src/iobify/iobify.go
	export GOPATH=$$(pwd)/go; cd go/src/iobify; go install

$(GENERATED)/twnc/%.iob : $(GENERATED)/twnc/%.raw $(GENERATED)/twnc/%.tok $(IOBIFY)
	$(IOBIFY) $(GENERATED)/twnc/$*.raw $(GENERATED)/twnc/$*.tok > $@ 2> $(GENERATED)/twnc/$*.iob.log

$(GENERATED)/twnc/%.raw : $(TWNC)/%.xml $(PARAGRAPHY_TWNC_RAW)
	mkdir -p $(dir $@)
	cat $< | tidy -q -xml --wrap 0 | $(PARAGRAPHY_TWNC_RAW) > $@

$(GENERATED)/twnc/%.tok : $(TWNC_TOKENIZED)/%.sents.gz $(PARAGRAPHY_TWNC_TOK)
	mkdir -p $(dir $@)
	zcat $< | $(PARAGRAPHY_TWNC_TOK) > $@

### Extensions

include Makefile.bin

### ZIP file for web site

elephant-experiments.zip :
	rm -f $@
	rm -f elephant-experiments
	ln -s . elephant-experiments --force
	zip $@ elephant-experiments
	zip $@ elephant-experiments/generated elephant-experiments/data/ext
	e=elephant-experiments; zip $@ --symlinks -r $$e/Makefile $$e/Makefile.bin $$e/README $$e/data/english.iob $$e/data/english.dev.iob $$e/data/english.test.iob $$e/data/english.train.iob $$e/data/italian.iob $$e/data/italian.dev.iob $$e/data/italian.test.iob $$e/data/italian.train.iob $$e/experiments-srn/depend $$e/experiments-srn/emnlp-2013-input $$e/experiments-srn/emnlp-2013-rnn-models $$e/experiments-srn/makefile $$e/go/src $$e/patterns $$e/src -x '*/.svn/*' $$e/src/elman/rnnlmlib.o $$e/src/elman/elman $$e/src/wapiti/wapiti
	rm elephant-experiments

### PRECIOUS TARGETS

.PRECIOUS : generated/%.pattern generated/%.model generated/%.labeled generated/%.eval
