
PARSER = hhmmparser-sr

PARSERMODEL = QF.elided.swbd.model
POSMODEL = QF.swbd.model
TREES2DAT = trees2dat-sr.rb
MODELDIR = genmodel

## ST1: flatten nested EDITED (q) and `fix' edited (E) (i.e. coordinate EDITED with alteration)
ST1 = qe

## ST2: Fix edited (after the first fix, treebinarize runs, which creates some
## opportunities for coordinating (esp. wrt S))
ST2 = e

include swbd.mk

############## 
# Steps for building completion model for unf phrases
###################

$(MODELDIR)/unfs.txt: $(MODELDIR)/2.swbd.sedtrees $(MODELDIR)/3.swbd.sedtrees $(SCRIPTSDIR)/generateUnfs.rb
	cat $(MODELDIR)/2.swbd.sedtrees $(MODELDIR)/3.swbd.sedtrees | ruby $(SCRIPTSDIR)/generateUnfs.rb | grep -v "EDITED" | grep -v "UNF" > $@

$(MODELDIR)/elisioncounts.txt: $(MODELDIR)/unfs.txt $(MODELDIR)/2.swbd.sedtrees $(MODELDIR)/3.swbd.sedtrees $(SCRIPTSDIR)/buildElisionModel.rb
	cat $< $(MODELDIR)/[23].swbd.sedtrees | ruby $(SCRIPTSDIR)/buildElisionModel.rb | grep -v "EDITED" | grep -v "UNF" > $@

$(MODELDIR)/elisionmodel.txt: $(MODELDIR)/elisioncounts.txt $(SCRIPTSDIR)/relfreq.pl
	cat $< | perl $(SCRIPTSDIR)/relfreq.pl | ruby scripts/limitProb.rb | sort > $@

$(MODELDIR)/%.swbd.elidedtrees: $(MODELDIR)/elisionmodel.txt $(MODELDIR)/%.swbd.sedtrees $(SCRIPTSDIR)/addElided.rb
	cat $< $(MODELDIR)/$*.swbd.sedtrees | perl -p -e 's/(arg[^ ]+)UNF/\1/g' | ruby scripts/addElided.rb > $@

$(MODELDIR)/%.swbd.elidedrctrees: $(MODELDIR)/%.swbd.elidedtrees
	cat $< | ruby $(SCRIPTSDIR)/buildModel.rb -t | perl $(SCRIPTSDIR)/trees2cnftrees.pl | perl $(SCRIPTSDIR)/cnftrees2flattrees.pl | perl $(SCRIPTSDIR)/flattrees2rctrees.pl > $@

$(MODELDIR)/%.swbd.fixedrctrees: $(MODELDIR)/%.swbd.elidedrctrees
	cat $< | ruby $(SCRIPTSDIR)/removeElided.rb | perl -p -e 's/(.*):.*/\1/g' > $@

stanford-parser/train.rctrees: $(MODELDIR)/2.swbd.fixedrctrees $(MODELDIR)/3.swbd.fixedrctrees
	cat $^ > $@
	perl -pi -e 's/\[/(/g;s/\]/)/g' $@
	perl -pi -e 's/-[^ ()\/]*\//\//g' $@
	perl -pi -e 's/([A-Z\$$]+)[^ ]*\#([^ \)]+)/(\1 \2)/g' $@
	perl -pi -e 's/\(([^ ]+) \(\1 ([^ )(]+)\)\)/(\1 \2)/g' $@
	perl -pi -e 's/_/UNDERSCORE/g' $@
	perl -pi -e 's/(.*)/(S1 \1)/' $@

stanford-parser/elided.model.gz: stanford-parser/train.rctrees
	java -server -mx2g -cp stanford-parser/stanford-parser.jar edu.stanford.nlp.parser.lexparser.LexicalizedParser -verbose -PCFG -vMarkov 1 -uwm 0 -headFinder edu.stanford.nlp.trees.LeftHeadFinder -train $< 0 -saveToSerializedFile $@

stanford-parser/sp.swbd.dev.in: sp.swbd.dev.in
	cat $^ | sed 's/(\([A-Z\$$]*\)[^ ]* \([^()]*\))/(\1 \2\/\1)/g;s/([^ ]*//g;s/)//g;s/[^ \/]*\#//g' > $@

stanford-parser/sp.swbd.dev.out: stanford-parser/elided.model.gz stanford-parser/sp.swbd.dev.in
	java -server -mx2g -cp stanford-parser/stanford-parser.jar edu.stanford.nlp.parser.lexparser.LexicalizedParser -PCFG -vMarkov 1 -uwm 0 -headFinder edu.stanford.nlp.trees.LeftHeadFinder -tokenized -tagSeparator / -sentences newline -outputFormat oneline $^ | grep -v "SENTENCE_SKIPPED" | grep -v "Sentence skipped" > $@
