# EMNLP 2013 experiments with tokenization ############

# make depend
# make -jNUM_JOBS 

SCRIPTS := `pwd`/../src/scripts-srn
DATA := `pwd`

#####################################################

SHELL := /bin/bash
PATH := $(PATH):$(SCRIPTS)


WAPITI_NTHREAD := 1

# DATASETS   = gmb twn
# SIZES      = 0.125 0.25 0.5 1.0
# FEATURES   = code5 code5-top10 code7 code7-top10 code9 code9-top10\
#              ngram5 ngram5-top10 ngram7 ngram7-top10

DATASETS   = english dutch italian 
SIZES      = 1.0
FEATURES   = codecat0-top10 $(foreach win, 1 3 5 7 9 11 13, codecat$(win) codecat$(win)-top10)


# Directory structure is:
# DATASET/
#   model.rnn
#   test.iob
#   test.lines
#   test.hidden.gz
#   test.fs
#   dev.iob
#   dev.lines
#   dev.hidden.gz
#   dev.fs
#   train.iob
#   train.lines
#   train.hidden.gz
#   train.fs
#   SIZE/
#     train.iob.abl
#     train.hidden.abl.gz
#     train.fs.abl
#     FEATURES/
#       pattern
#       test.predict
#       test.accuracy
#       dev.predict
#       dev.accuracy
#       model



eval: $(foreach d, $(DATASETS),\
       $(foreach s, $(SIZES),\
         $(foreach f, $(FEATURES),\
	   $(foreach file, dev test,\
	      $(d)/$(s)/$(f)/$(file).eval))))
	for d in $(DATASETS); do\
          for s in $(SIZES); do\
            for f in $(FEATURES); do\
              for file in dev test; do\
                echo "`cat $$d/$$s/$$f/$$file.eval | tail -n1` $$d $$s $$f $$file";\
              done;\
            done;\
          done;\
        done > $@

directories:
	$(foreach d, $(DATASETS),\
          $(foreach s, $(SIZES),\
           $(foreach f, $(FEATURES),\
             mkdir -p $(d)/$(s)/$(f);)))

%/model.rnn: directories
	ln -s --force $(DATA)/emnlp-2013-rnn-models/$*.rnn $@

%/vocab: directories
	ln -s --force $(DATA)/emnlp-2013-rnn-models/$*.vocab $@

#%/model.rnn: %/train.big.lines %/valid.lines %/elman-params
#	elman -rnnlm $@ -train $*/train.big.lines -valid $*/valid.lines -rand-seed 1 \
#        `cat $*/elman-params` -anti-kasparek 10000000 -class 1 -debug 2 > $*/rnn.log 2>&1 

# %/vocab: %/train.big.iob
# 	cat $*/train.big.iob | conll2lines.py to | vocab.py make > $@

%/train.iob: directories
	ln -s --force $(DATA)/emnlp-2013-input/$*/train-1.0 $@

%/dev.iob: directories
	ln -s --force $(DATA)/emnlp-2013-input/$*/dev.iob $@

%/test.iob: directories
	ln -s --force $(DATA)/emnlp-2013-input/$*/test.iob $@


%/pattern: directories
	ln -s --force $(SCRIPTS)/$(lastword $(subst /, ,$*)).wappat $@

%.eval: %.conlleval %.accuracy
	cat $*.conlleval $*.accuracy > $@

%.conlleval: %.predict
	cat $*.predict | conlleval -r > $@

%.accuracy: %.predict
	cat $*.predict | rev | cut -f1,2 -d' ' | rev | accuracy.py > $@

makefile: depend
# Generate explicit dependencies for .abl, model and .predict and .lines
depend:
	for d in $(DATASETS); do \
           for typ in tain.big valid train dev test; do\
                echo "$$d/$$typ.lines: $$d/vocab";\
           done;\
           for typ in train dev test; do\
                echo "$$d/$$typ.hidden.gz: $$d/model.rnn";\
           done;\
           for s in $(SIZES); do\
             for typ in iob hidden.gz fs; do\
                 echo "$$d/$$s/train.$$typ.abl: $$d/train.$$typ";\
             done;\
             for f in $(FEATURES); do\
               echo "$$d/$$s/$$f/model: $$d/$$s/train.fs.abl $$d/dev.fs";\
               for file in dev test; do\
                  echo "$$d/$$s/$$f/$$file.predict: $$d/$$file.fs $$d/$$s/$$f/model";\
               done;\
             done;\
           done;\
        done > $@

-include depend

# .predict depends on .fs two directories up
%.predict: 
	wapiti label --model $(dir $*)/model $(dir $*)../../$(notdir $*).fs \
                     | tr '\t' ' ' >  $@

# model depends on train.fs.abl one directory up and
#                  dev.fs two directories up
%/model: %/pattern
	wapiti train --pattern $*/pattern $*/../train.fs.abl $@ --stopwin 20 \
                     --nthread $(WAPITI_NTHREAD) --devel $*/../../dev.fs > $*/wapiti.log 2>&1

# .abl depends on corresponding file one directory up
%.abl: 
	ablate.py $(lastword $(subst /, ,$(abspath $(dir $*)))) $(dir $*)../$(notdir $*) > $@ 

# .hidden.gz depends on model.rnn in same directory
%.hidden.gz: %.lines 
	export OMP_NUM_THREADS=1; \
        elman -rnnlm $(dir $*)/model.rnn -test $*.lines -print-hidden | gzip > $@

# .lines depends on vocab in same directory
%.lines: %.iob
	cat $*.iob | conll2lines.py to | vocab.py replace $* $(dir $*)vocab > $@


%.fs: %.iob %.unicat %.hidden.gz 
	paste -d' ' <(cut -f1 -d' ' $*.iob) \
                    $*.unicat\
                    <(zcat $*.hidden.gz | discretize.py 20 0.5)\
                    <(cut -f2 -d' ' $*.iob) > $@
%.unicat: %.iob
	cat $*.iob | unicat.py  > $@

clean:
	rm -rf $(DATASETS)

.PRECIOUS: %/model %.hidden.gz %.fs %.lines %.predict %.eval %/pattern %/model.rnn %.abl \
           %/train.iob %/dev.iob %/test.iob

.PHONY: directories
