################################################################################
#
#                       Headwords in the HHMMParser
#                  in wsjparse Makefile: include hdwd.mk
#
################################################################################

##############################
# Binaries and development
##############################

dbnparser.out: testparser.sent bin/hhmmparser-hdwd genmodel/QF.wsj.model genmodel/POS.model
	cat testparser.sent | bin/hhmmparser-hdwd genmodel/QF.wsj.model genmodel/POS.model > $@ #> $@ #| perl scripts/hypoths2trees.pl > $@
hwmdparser.out: testparser.sent bin/hhmmparser-hdwd genmodel/LMod.model genmodel/QF.wsj.hd.model genmodel/POS.hd.model
	cat testparser.sent | bin/hhmmparser-hdwd genmodel/LMod.model genmodel/QF.wsj.hd.model genmodel/POS.hd.model > $@ #| perl scripts/hypoths2trees.pl > $@
hwmdparser.gdb: 
	g++ $(INCLUDES) -Wall -DNDEBUG -g -c src/hhmmparser-hdwd.cpp -o src/hhmmparser-hdwd.o
	g++ $(INCLUDES) -Wall -DNDEBUG -g -L/sw/lib/ -lboost_thread -lm src/hhmmparser-hdwd.o -o bin/hhmmparser-hdwd


##############################
# Pre-process treebank
##############################

%nphd.bintrees: %.trees scripts/treebinarize.pl
	cat $< | perl scripts/treebinarize.pl -p > $@
%puhd.bintrees: %.trees scripts/treebinarize.pl
	cat $< | perl scripts/treebinarize.pl > $@

%hd.ucnftrees: %.sedtrees scripts/trees2cnftrees.pl scripts/calcHdwdTree.rb
	cat $< | sed 's/(\([^ ]*\) *\([^()]*\) *)/(\1 \1\#\2)/g' | perl scripts/trees2cnftrees.pl | ruby scripts/calcHdwdTree.rb | sed 's/(\([^ \(\)][^ \(\)]*\)\* \([^ \(\)][^ \(\)]*\)#/(\1* \2*#/g' | sed 's/( /[/g;s/ )/]/g' | grep -v '^$$' > $@

### %hd.rctrees, %hd.crctrees, should be taken care of in Makefile

.PRECIOUS: genmodel/all.%.cnftrees genmodel/HDMD.%.model
genmodel/HDMD.%.model: genmodel/all.%.cnftrees
	cat $^ | ruby scripts/calcHdwdCPT.rb | perl scripts/relfreq-hdwd.pl -u 100 -d > $@
#	cat $^ | ruby scripts/calcHdwdCPT.rb | perl scripts/relfreq-hdwd.pl -u $(word 3,$(subst ., ,$@)) -d > $@
genmodel/HDMD.%.5del.model: genmodel/all.%.cnftrees
	cat $^ | ruby scripts/calcHdwdCPT.rb | perl scripts/relfreq-hdwd.pl -u 5 -d > $@
genmodel/LMod.%.model: genmodel/HDMD.%.model
	cat $^ | perl scripts/calcLmod.pl > $@



##############################
# Model builders
##############################

#genmodel/QF.hd.dat: $(HDCRCTRAINSET) genmodel/eos.tree scripts/trees2dat.rb
#	cat $(HDCRCTRAINSET) genmodel/eos.tree | grep -v '\^[4-9]' | sed 's/\^[1-3]//g' | ruby scripts/trees2dat-noplus.rb | grep '^Pw' > $@
##	echo 'F  2 END -/-|- : 1  = 1.00000000' >> $@
##	echo 'F  1 S END/END|END : 1 = 1.00000000' >> $@

#genmodel/POS.hd.model:
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	make genmodel/QF.hd.dat bin/postrainer scripts/relfreq.pl
#	cat genmodel/QF.hd.dat | grep '^Pw' | sed 's/^Pw *\([^ ]*\) *: *\([^ ]*\)/PwDT \1 : \2/' | bin/postrainer | grep -v "WARNING: distribution Y_prior: no values" > $@
#	cat genmodel/QF.hd.dat | grep '^Pw' | sed 's/^Pw *\([^ ]*\) *: *\([^ ]*\)/P : \2/' | perl scripts/relfreq.pl >> $@

#hhmm.qf.wsjnp.%.hd.hypoth.dat: hhmm.qf.wsjnp.%.gold.evalform
#	make genmodel/QF.wsj.hd.model
#	make genmodel/POS.hd.model # and genmodel/LMod.model
#	make bin/hhmmparser
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	cat $< | sed 's/([^ ]*//g;s/)//g;s/[^ \/]*\#//g' | nice bin/hhmmparser -b 500 genmodel/QF.wsj.hd.model genmodel/POS.hd.model genmodel/LMod.model > $@

.PRECIOUS: genmodel/md-hdwdlex.wsj%.model
genmodel/md-hdwdlex.%.model: genmodel/$$(word 1,$$(subst $$(comma), ,$$*)).dcnftrees
	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
	@sleep 5
	make genmodel/all.$(word 2,$(subst ., ,$@)).dcnftrees   bin/trees2hdwdlexsvs        scripts/relfreq.pl
	cat  genmodel/all.$(word 2,$(subst ., ,$@)).dcnftrees | bin/trees2hdwdlexsvs | grep -v '^Cp' | sed 's/^M L/M l/g;s/^M R/M r/g;s/^Gr : /Gr : l 1 /;s/^G : L/G : l/g;s/^G : R/G : r/g' | perl scripts/relfreq.pl | sort > $@ #| sed 's/ [h][^ :]*:/ h:/g;s/^L [h][^ ]*/L h/g' | perl scripts/relfreq-md.pl -f | sort | sed 's/^MdL/M l/g;s/^MdR/M r/g;s/^G : L/G : l/g;s/^G : R/G : r/g;s/^Gr  *:/Gr : l 1/g;s/ROOT{Unk}/ROOT{unk}/g' > $@
	cat $@ | grep '^Pw ' | sed 's/^Pw  *\([^ ]*\).*/W : \1/' | perl scripts/relfreq.pl | sort >> $@


##############################
# Test HHMM Parser
##############################

#hhmm.qf.wsjnphd.%.hypoth.dat: #separate make target allows me to include LMod.%.model
#	make hhmm.qf.wsjnphd.$*.gold.evalform
##	make genmodel/QF.$(word 3,$(subst ., ,$@)).model
##	make genmodel/POS.$(word 3,$(subst ., ,$@)).model
##	make genmodel/LMod.$(word 3,$(subst ., ,$@)).model
#	make bin/hhmmparser-hdwd
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!!'
#	@sleep 5
#	cat hhmm.qf.wsjnphd.$*.gold.evalform | sed 's/([^ ]*//g;s/)//g;s/[^ \/]*\#//g' | nice bin/hhmmparser-hdwd -b $(word 5,$(subst ., ,$@)) genmodel/QF.$(word 3,$(subst ., ,$@)).model genmodel/POS.$(word 3,$(subst ., ,$@)).model genmodel/LMod.$(word 3,$(subst ., ,$@)).model> $@

#hhmm.qf.wsjpuhd.%.hypoth.dat: 
#	make hhmm.qf.wsjpuhd.$*.gold.evalform
##	make genmodel/QF.$(word 3,$(subst ., ,$@)).model
##	make genmodel/POS.$(word 3,$(subst ., ,$@)).model
##	make genmodel/LMod.$(word 3,$(subst ., ,$@)).model
#	make bin/hhmmparser-hdwd
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	cat hhmm.qf.wsjpuhd.$*.gold.evalform | sed 's/([^ ]*//g;s/)//g;s/[^ \/]*\#//g' | nice bin/hhmmparser-hdwd -b $(word 5,$(subst ., ,$@)) genmodel/QF.$(word 3,$(subst ., ,$@)).model genmodel/POS.$(word 3,$(subst ., ,$@)).model genmodel/LMod.$(word 3,$(subst ., ,$@)).model> $@


### run stuff in parallel... for headwords only.
# log: hhmm.qf.wsj22np.firsttry.hd.eval => UNIFORM=.5
hhmm.qf.wsjnphd.%.hypoth.dat1: hhmm.qf.wsjnphd.%.gold.evalform1
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform1 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@
hhmm.qf.wsjnphd.%.hypoth.dat2: hhmm.qf.wsjnphd.%.gold.evalform2
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform2 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@
hhmm.qf.wsjnphd.%.hypoth.dat3: hhmm.qf.wsjnphd.%.gold.evalform3
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform3 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@
hhmm.qf.wsjnphd.%.hypoth.dat4: hhmm.qf.wsjnphd.%.gold.evalform4
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform4 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@
hhmm.qf.wsjnphd.%.hypoth.dat5: hhmm.qf.wsjnphd.%.gold.evalform5
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform5 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@
hhmm.qf.wsjnphd.%.hypoth.dat6: hhmm.qf.wsjnphd.%.gold.evalform6
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform6 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@
hhmm.qf.wsjnphd.%.hypoth.dat7: hhmm.qf.wsjnphd.%.gold.evalform7
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform7 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@
hhmm.qf.wsjnphd.%.hypoth.dat8: hhmm.qf.wsjnphd.%.gold.evalform8
	runhhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@)) hhmm.qf.wsjnphd.$*.gold.evalform8 $(word 5,$(subst ., ,$@)) genmodel/QF.wsjnphd.model genmodel/POS.wsjnphd.model genmodel/LMod.wsjnphd.model $@

### parallel eval: run with make -j4 or -j5
.PRECIOUS: hhmm.qf.wsjnphd.%.hypoth.dat hhmm.qf.wsjnphd.%.hypoth.dat1 hhmm.qf.wsjnphd.%.hypoth.dat2 hhmm.qf.wsjnphd.%.hypoth.dat3 hhmm.qf.wsjnphd.%.hypoth.dat4 hhmm.qf.wsjnphd.%.hypoth.dat5 hhmm.qf.wsjnphd.%.hypoth.dat6 hhmm.qf.wsjnphd.%.hypoth.dat7 hhmm.qf.wsjnphd.%.hypoth.dat8

hhmm.qf.wsjnphd.%.hypoth.dat.para1: hhmm.qf.wsjnphd.%.hypoth.dat1 hhmm.qf.wsjnphd.%.hypoth.dat2 hhmm.qf.wsjnphd.%.hypoth.dat3 hhmm.qf.wsjnphd.%.hypoth.dat4
	@echo 'Running the first half of the corpus'
hhmm.qf.wsjnphd.%.hypoth.dat.para2: hhmm.qf.wsjnphd.%.hypoth.dat5 hhmm.qf.wsjnphd.%.hypoth.dat6 hhmm.qf.wsjnphd.%.hypoth.dat7 hhmm.qf.wsjnphd.%.hypoth.dat8
	@echo 'Running the second half o the corpus'

hhmm.qf.wsjnphd.%.hypoth.dat: bin/hhmmparser-hdwd hhmm.qf.wsjnphd.%.gold.evalform
	cp bin/hhmmparser-hdwd bin/hhmmparser-hdwd-$(word 4,$(subst ., ,$@))
	make $(PARA) hhmm.qf.wsjnphd.$*.hypoth.dat.para1
	@echo 'Trying to put together .dat1 .dat2 .dat3 .dat4 > .dat'
	@sleep 10
	cat hhmm.qf.wsjnphd.$*.hypoth.dat1 hhmm.qf.wsjnphd.$*.hypoth.dat2 hhmm.qf.wsjnphd.$*.hypoth.dat3 hhmm.qf.wsjnphd.$*.hypoth.dat4 > hhmm.qf.wsjnphd.$*.hypoth.dat
	make $(PARA) hhmm.qf.wsjnphd.$*.hypoth.dat.para2
	@echo 'Trying to put together .dat5 .dat6 .dat7 .dat8 >> .dat'
	@sleep 10
	cat hhmm.qf.wsjnphd.$*.hypoth.dat5 hhmm.qf.wsjnphd.$*.hypoth.dat6 hhmm.qf.wsjnphd.$*.hypoth.dat7 hhmm.qf.wsjnphd.$*.hypoth.dat8 >> hhmm.qf.wsjnphd.$*.hypoth.dat

hhmm.qf.wsjnphd.%.hypoth.cnftrees: hhmm.qf.wsjnphd.%.hypoth.dat scripts/hypoths2rctrees.pl scripts/rctrees2flattrees.pl scripts/flattrees2cnftrees.pl scripts/unbinarize.pl
#	make $(PARA) hhmm.qf.wsjnphd.$*.hypoth.dat.para
	cat hhmm.qf.wsjnphd.$*.hypoth.dat | sed 's/{[^}]*}//g' | sed 's/\*//g' | perl scripts/hypoths2rctrees.pl | perl scripts/rctrees2flattrees.pl | perl scripts/flattrees2cnftrees.pl > $@

# to step through the trees and see how they look, compare these two.
%.hypoth.rctrees.display: %.hypoth.dat
	cat $< | perl scripts/hypoths2rctrees.pl -e | /project/nlp/swu/snowbank/scripts/viewtree & #> $@
#	cat $@ | /project/nlp/swu/snowbank/scripts/viewtree &
%.gold.rctrees.display: %.gold.evalform
	cat $< | perl scripts/treebinarize.pl -p | ruby scripts/propArgs.rb | sed 's/( \([^ ]\)/(\1/g;s/ *)/)/g' | perl scripts/treesed.pl | sed 's/(\([^ ]*\) *\([^()]*\) *)/(\1 \1\#\2)/g' | perl scripts/trees2cnftrees.pl | ruby scripts/lowercasepreterm.rb | sed 's/( */[/g;s/ *)/]/g' | perl scripts/cnftrees2flattrees.pl | perl scripts/flattrees2rctrees.pl | /project/nlp/swu/snowbank/scripts/viewtree & #> $*.gold.rctrees
#	make genmodel/$*.gold.rctrees
#	cat $*.gold.rctrees | /project/nlp/swu/snowbank/scripts/viewtree &

##############################
# Eval
##############################
.PRECIOUS: %.gold.evalform %.gold.evalform1 %.gold.evalform2 %.gold.evalform3 %.gold.evalform4
### split up evalforms for parallel evaluation.
%.gold.evalform1: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	head -$$i $*.gold.evalform > $@
%.gold.evalform2: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	head -$$((2*$$i)) $*.gold.evalform | tail -$$i > $@
%.gold.evalform3: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	head -$$((3*$$i)) $*.gold.evalform | tail -$$i > $@
%.gold.evalform4: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	head -$$((4*$$i)) $*.gold.evalform | tail -$$i > $@
%.gold.evalform5: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	head -$$((5*$$i)) $*.gold.evalform | tail -$$i > $@
%.gold.evalform6: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	head -$$((6*$$i)) $*.gold.evalform | tail -$$i > $@
%.gold.evalform7: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	head -$$((7*$$i)) $*.gold.evalform | tail -$$i > $@
%.gold.evalform8: %.gold.evalform
	export i=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`/8));\
	export j=$$((`wc -l $*.gold.evalform | sed 's/ [^ ]*\.*//g'`));\
	cat $*.gold.evalform | tail -$$(($$j-(7*$$i))) > $@

#near-exact copy of .evalform rule, to avoid using twice for gold and hypoth
%.gold.evalform: %.gold.cnftrees scripts/hypoths2rctrees.pl scripts/rctrees2flattrees.pl scripts/flattrees2cnftrees.pl scripts/unbinarize.pl
	cat $< | sed 's/[^ \/]*\#//g' | perl scripts/unbinarize.pl | perl -p -e 's/([A-Z]+)[-a-z\$$]+[-a-zA-Z\$$]*/\1/g' > $@

%.wsj22np10.gold.cnftrees: genmodel/wsj22np.cnftrees
	cat $< | head -10 > $@
%.wsj22np05.gold.cnftrees: genmodel/wsj22np.cnftrees
	cat $< | head -5 > $@


%.eval.para: bin/evalb srcmodel/new.prm %.gold.evalform
	cat $*.hypoth.dat1 $*.hypoth.dat2 $*.hypoth.dat3 $*.hypoth.dat4 > $*.hypoth.dat
	cat $*.hypoth.dat | sed 's/{[^}]*}//g' | sed 's/\*//g' | perl scripts/hypoths2rctrees.pl | perl scripts/rctrees2flattrees.pl | perl scripts/flattrees2cnftrees.pl > $*.hypoth.cnftrees 
	make $*.hypoth.evalform
	bin/evalb -p srcmodel/new.prm $*.gold.evalform $*.hypoth.evalform > $*.eval


test1.sent:
	echo '1 john loves mary' > $@
	sleep 3
	echo '1 done.'
	cat test1.sent | perl testperl.pl 1# | sed 's/ /./g' > eraseme.1
test2.sent:
	echo '2 joe loves mary' > $@
	sleep 3
	echo '2 done.'
	cat test2.sent | perl testperl.pl 2# | sed 's/ /./g' > eraseme.2
test3.sent:
	echo '3 john loves martha' > $@
	sleep 3
	echo '3 done.'
	cat test3.sent | perl testperl.pl 3# | sed 's/ /./g' > eraseme.3
test4.sent:
	echo '4 joe loves martha' > $@
	sleep 3
	echo '4 done.'
	cat test4.sent | perl testperl.pl 4# | sed 's/ /./g' > eraseme.4

testperl: test1.sent test2.sent test3.sent test4.sent

#SHELL=./myshell
dummytest:
	make $(PARA) testperl;
	cat test1.sent test2.sent test3.sent test4.sent > $@
#	cat test1.sent | perl testperl.pl 1# | sed 's/ /./g' > eraseme.1
#	cat test2.sent | perl testperl.pl 2# | sed 's/ /./g' > eraseme.2
#	cat test3.sent | perl testperl.pl 3# | sed 's/ /./g' > eraseme.3
#	cat test4.sent | perl testperl.pl 4# | sed 's/ /./g' > eraseme.4
#	cat eraseme.1 eraseme.2 eraseme.3 eraseme.4 > eraseme.all




#################### STUFF I THINK IS OBSOLETE (PLEASE COMMENT BACK IN IF I'M WRONG!)


# End of sentence markers in a simple tree
#genmodel/eos.tree:
#	echo '[END [END END#eos]]' > $@
#genmodel/eos.tree.start:
#	echo '[START [START start#eos]]' > genmodel/eos.tree

# QF models have rich dependencies directly from corpus
#genmodel/QFP.%.tally: genmodel/all.%.crctrees scripts/trees2dat-noplus.rb
#	cat $< | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | ruby scripts/trees2dat-noplus.rb | perl -e 'while(<>){chop;$$T{$$_}++;} foreach $$t(keys %T){print "$$t = $$T{$$t}\n";}' > $@
##genmodel/QF.swbd.model: $(SWBDCRCTRAINSETPOS) $(MODELDIR)/eos.swbdcrctrees
##	cat $(SWBDCRCTRAINSETPOS) $(MODELDIR)/eos.swbdcrctrees > genmodel/model.input
##	make genmodel/QF.swbd.phony

# QF models have rich dependencies directly from corpus
#genmodel/QFP.%.tally: genmodel/all.%.crctrees scripts/trees2dat-noplus.rb
#	cat $< | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | ruby scripts/trees2dat-noplus.rb | perl -e 'while(<>){chop;$$T{$$_}++;} foreach $$t(keys %T){print "$$t = $$T{$$t}\n";}' > $@
###	echo 'Fr 1 S S/end|end : 1 = 1' >> $@
###	echo 'Fr 2 end -/-|- : 1  = 1' >> $@
#genmodel/QF.%.model:
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	make genmodel/QFP.$*.tally
#	cat genmodel/QFP.$*.tally | grep '^Pw ' | perl scripts/relfreq.pl -c 3 | sort > $@
#	cat genmodel/QFP.$*.tally | grep '^Pw ' | perl scripts/relfreq.pl -c 3 | perl -n -e 'if(s/^Pw ([^ ]+).*/W : \1 = 1.0/){ print }' | sort -u >> $@
#	cat genmodel/QFP.$*.tally | grep -v '^Pw ' | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl >> $@
#genmodel/QF.similaritynphd.model:
##	make genmodel/QF.wjsnphd.model
#	grep '^Pw' /scratch/nlp/swu/wsjparse/genmodel/QF.wsjnphd.model > $@
#	grep '^W' /scratch/nlp/swu/wsjparse/genmodel/QF.wsjnphd.model >> $@
#	make genmodel/QFP.similaritynphd.tally
#	echo 'Fr 1 S/end|end : 1 = 1' >> genmodel/QFP.similaritynphd.tally
##	echo 'Fr 2 end -/-|- : 1  = 1' >> genmodel/QFP.similaritynphd.tally
#	cat genmodel/QFP.similaritynphd.tally | grep -v '^Pw ' | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl >> $@

# CIQF model trained from PCFG, through RCPCFG
#.PRECIOUS: genmodel/pcfg.%.model
#genmodel/pcfg.%.model:
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	make genmodel/all.$*.cnftrees        scripts/trees2rules.pl        scripts/relfreq.pl
#	cat  genmodel/all.$*.cnftrees | perl scripts/trees2rules.pl | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | sed 's/Pg /Pc /g' | sort > $@
#	cat  genmodel/all.$*.cnftrees | sed 's/^(\([^ ]*\).*$$/Gr : \1/' | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | sort >> $@
#	cat $@ | grep '^Pw ' | sed 's/^Pw  *\([^ ]*\).*/W : \1/' | perl scripts/relfreq.pl | sort >> $@
#.PRECIOUS: genmodel/bpcfg.%.model
#genmodel/bpcfg.%.model:
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	make genmodel/pcfg.$*.model   bin/pcfg2bpcfg
#	cat  genmodel/pcfg.$*.model | bin/pcfg2bpcfg | sort > $@

#genmodel/sembpcfg.%.model: genmodel/bpcfg.%.model #deprecated
#	cat $< | sed 's/^GG \([^ ][^ ]*\)-\([lr]\)\([0-9]\) :/M \2 \3 \1 :/g;s/^Gr : \([^ ][^ ]*\)-\([lr]\)\([0-9]\) =/Gr : \2 \3 \1 =/g;s/\([^ ][^ ]*\)-[lr][0-9] /\1 /g' | ruby scripts/rules2lrules.rb | sed 's/^M\(.*\)\([^ ][^ ]*\) :/M\1\2{unk} :/g;s/^Gr : \(.*\) =/Gr : \1{unk} =/g'> $@

#.PRECIOUS: genmodel/mlpcfg.%.model
#genmodel/mlpcfg.%.model: #deprecated... to be restored?
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	make genmodel/all.$(word 2,$(subst ., ,$@)).cnftrees        scripts/trees2svsrules.pl        scripts/relfreq.pl
#        ## l={m+c,h} version
#	cat  genmodel/all.$(word 2,$(subst ., ,$@)).cnftrees | perl scripts/trees2svsrules.pl | sed 's/ h[^ :]*:/ h:/g;s/^L h[^ ]*/L h/g' > $@.dat 
#	grep -v '^L m ' $@.dat | perl scripts/relfreq-ml.pl -c $(COUNTCUTOFF) -r unk | sort > $@
#	grep '^L m' $@.dat | perl scripts/lmodelbackoff.pl -l 0.$(word 3,$(subst ., ,$@)) >> $@
#        ## l={m,h} version
##	cat  genmodel/all.$(word 2,$(subst ., ,$@)).cnftrees | perl scripts/trees2svsrules.pl -m | sed 's/ \([h]\)[^ :]*:/ \1:/g;s/^L \([h]\)[^ ]*/L \1/g' > $@.dat 
##	cat $@ | grep '^Pw ' | sed 's/^Pw  *\([^ ]*\).*/W : \1/' >> $@.dat #| perl scripts/relfreq.pl -c $(COUNTCUTOFF) | sort >> $@
#	cat $@.dat | perl scripts/relfreq-ml.pl -c $(COUNTCUTOFF) -r unk | sort > $@
#	rm -f $@.dat


## debugging/development
#genmodel/md.%.trivmodel: #for debugging
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	cat genmodel/md-rlnclust.$*.model.tmpdat | perl scripts/rlnclust-syn.pl -g | grep '^G ' > genmodel/md-rlnclust.$*.tmp
#	cat genmodel/md-rlnclust.$*.model genmodel/md-rlnclust.$*.tmp | perl scripts/trivialize.pl > $@
#	rm -f md-rlnclust.$*.tmp
#genmodel/vecmd-rlnclust.%.hdwdmodel: #for debugging
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	cat genmodel/md.$*.model > genmodel/md-rlnclust.$*.model
#	grep '^HW ' genmodel/md.$*.model | sed 's/^HW : \([^ ]*\) = .*/H \1 : \1 = 1/g' >> genmodel/md-rlnclust.$*.model
#	make genmodel/vecmd-rlnclust.$*.model
#	mv genmodel/vecmd-rlnclust.$*.model $@
#eraseme.%: #for debugging
#	pinch $(subst wsj,,$(word 5,$(subst ., ,$@))) wsj.sents | bin/ckyparser-$(word 2,$(subst ., ,$@)) -v genmodel/$(word 2,$(subst ., ,$@)).$(word 3,$(subst ., ,$@)).$(word 4,$(subst ., ,$@)).model genmodel/POS.wsjnp.model >& $@
#	grep 'pos match' $@ > eraseme.pos.$(word 2,$(subst ., ,$@)).$(word 3,$(subst ., ,$@)).$(word 4,$(subst ., ,$@)).$(word 5,$(subst ., ,$@))
#	grep 'binary match' $@ > eraseme.bin.$(word 2,$(subst ., ,$@)).$(word 3,$(subst ., ,$@)).$(word 4,$(subst ., ,$@)).$(word 5,$(subst ., ,$@))
%.quickeval: genmodel/%.model
	cat quickeval.sents | time bin/ckyparser-$(word 1,$(subst ., ,$@)) genmodel/$(word 1,$(subst ., ,$@)).$(word 2,$(subst ., ,$@)).$(word 3,$(subst ., ,$@)).model genmodel/POS.$(word 2,$(subst ., ,$@)).model > $*.hypoth.dat
	rmel $*.hypoth.dat > $*.hypoth.cnftrees
	ln -f quickeval.gold.cnftrees $*.gold.cnftrees
	make $*.eval
	mv $*.eval $*.quickeval
	rm -f $*.hypoth.dat

#### {hhmmparser...,ckyparser...}.eval
%.eval: bin/evalb srcmodel/new.prm %.gold.evalform %.hypoth.evalform
	bin/evalb -p srcmodel/new.prm $*.gold.evalform $*.hypoth.evalform > $@


## temporary tests to debug rlnclust-EM.pl, and deprecated models
#speedtest.%.tmp:
#	randlines $(word 2,$(subst ., ,$@)) genmodel/test.wsjnpc10hw.freqdat > $@
#speedtest.%:
#	make speedtest.$(word 3,$(subst ., ,$@)).tmp
#	cat speedtest.$(word 3,$(subst ., ,$@)).tmp | time perl scripts/rlnclust-EM.pl -e $(word 2,$(subst ., ,$@)) -i 5 -p 1 -r 1 > /dev/null
#.PRECIOUS: genmodel/sempcfg.%.model
#genmodel/sempcfg.%.dat: #deprecated
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	make genmodel/all.$*.cnftrees        scripts/trees2svsrules.pl        scripts/relfreq.pl
#	cat  genmodel/all.$*.cnftrees | perl scripts/trees2svsrules.pl | sort > $@
#	cat  genmodel/all.$*.cnftrees | sed 's/^(\([^ ]*\).*$$/Gr : \1/' | sort >> $@
#	cat $@ | grep '^Pw ' | sed 's/^Pw  *\([^ ]*\).*/W : \1/' | sort >> $@
#genmodel/sempcfg.%.model: genmodel/sempcfg.%.dat #deprecated
#	cat $< | perl scripts/relfreq-sem.pl -c $(COUNTCUTOFF) -r unk | sort > $@
##genmodel/RCPCFG.%.model:
##	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
##	@sleep 5
##	make genmodel/PCFG.$*.model   bin/pcfg2rcpcfg
##	cat  genmodel/PCFG.$*.model | bin/pcfg2rcpcfg | sort > $@
#genmodel/md-em.wsj%.model: #genmodel/md.%.model ## backoff-EM.pl script not working
#	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
#	@sleep 5
#	make genmodel/all.$(word 2,$(subst ., ,$@)).freqdat    genmodel/$(subst wsj,wsj$(subst  ,,$(WSJHELDOUTSECTS)),$(word 2,$(subst ., ,$@))).freqdat
#	sed 's/^/+/g' genmodel/$(subst wsj,wsj$(subst  ,,$(WSJHELDOUTSECTS)),$(word 2,$(subst ., ,$@))).freqdat > genmodel/$(subst wsj,wsj$(subst  ,,$(WSJHELDOUTSECTS)),$(word 2,$(subst ., ,$@))).heldfreqdat
#	cat genmodel/all.$(word 2,$(subst ., ,$@)).freqdat genmodel/$(subst wsj,wsj$(subst  ,,$(WSJHELDOUTSECTS)),$(word 2,$(subst ., ,$@))).heldfreqdat | perl scripts/relfreq-md.pl -c $(COUNTCUTOFF) -r Unk -f > $@.nolowfreqdat
#	grep -v '^\+' $@.nolowfreqdat > genmodel/all.$(word 2,$(subst ., ,$@)).nolowfreqdat
#	grep '^\+' $@.nolowfreqdat | sed 's/^\+//g' > genmodel/$(subst wsj,wsj$(subst  ,,$(WSJHELDOUTSECTS)),$(word 2,$(subst ., ,$@))).nolowfreqdat
##	grep -v '^L m ' $@.dat | perl scripts/relfreq-ml.pl -c $(COUNTCUTOFF) -r unk | sort > $@
##	grep '^L m' $@.dat | perl scripts/lmodelbackoff.pl -l 0.$(word 3,$(subst ., ,$@)) >> $@
##	cat $@.dat | perl scripts/relfreq-ml.pl -c $(COUNTCUTOFF) -r unk | sort > $@
#	cat genmodel/all.$(word 2,$(subst ., ,$@)).nolowfreqdat | perl scripts/backoff-EM.pl -p 10 -i 100 -o backoff.wsj$* genmodel/$(subst wsj,wsj$(WSJHELDOUTSECTS),$(word 2,$(subst ., ,$@))).nolowfreqdat | sed 's/^MdL/M l/g;s/^MdR/M r/g;s/^Gr  *:/Gr : l 1/g' > $@
##	cat genmodel/all.$(word 2,$(subst ., ,$@)).freqdat | perl scripts/backoff-EM.pl -p 10 -i 100 -o backoff.wsj$* genmodel/$(subst wsj,wsj$(WSJHELDOUTSECTS),$(word 2,$(subst ., ,$@))).freqdat | sed 's/^MdL/M l/g;s/^MdR/M r/g;s/^Gr  *:/Gr : l 1/g' > $@
##	rm -f $@.nolowfreqdat genmodel/$(subst wsj,wsj$(subst  ,,$(WSJHELDOUTSECTS)),$(word 2,$(subst ., ,$@))).heldfreqdat genmodel/$(subst wsj,wsj$(subst  ,,$(WSJHELDOUTSECTS)),$(word 2,$(subst ., ,$@))).nolowfreqdat genmodel/all.$(word 2,$(subst ., ,$@)).nolowfreqdat

#.PRECIOUS: genmodel/%.freqdat
#genmodel/%.freqdat: genmodel/%.dcnftrees scripts/trees2svsrules.pl
#	cat  genmodel/$*.dcnftrees | perl scripts/trees2svsrules.pl -d | sed 's/ h[^ :]*:/ h:/g;s/^L h[^ ]*/L h/g' | perl scripts/relfreq-md.pl -f | sort > $@

#.PRECIOUS: genmodel/efg.%.model
#genmodel/efg.%.model: genmodel/md.%.model scripts/ml2lstar.py

#.PRECIOUS: genmodel/lstar.%.model
#genmodel/lstar.%.model: genmodel/md.%.model scripts/ml2lstar.py
#	cat $< | python scripts/ml2lstar.py > $@
#	cat $< | egrep '^(Pw|Pc|LC|L) ' >> $@

#.PRECIOUS: genmodel/pw.%.model
#genmodel/pw.%.model:
#	make genmodel/all.$*.cnftrees                                                                                  scripts/relfreq.pl             scripts/sortbyprob.pl
#	cat  genmodel/all.$*.cnftrees | sed 's/([^ ]* //g;s/)//g;s/ /\n/g' | sed 's/\(.*\)\#\(.*\)/Pw \2 : \1/' | perl scripts/relfreq.pl -c 3 | perl scripts/sortbyprob.pl > $@
#	cat  $@ | perl -n -e 'if(s/^Pw ([^ ]+).*/W : \1 = 1.0/){ print }' | sort -u >> $@

genmodel/pw.similaritynphd.tally: genmodel/pw.wsjnphd.tally
	grep ' NN ' $< > $@
	grep 'VBD' $< | perl -p -e 's/VBDvbd([^* ]*\*?)/VBDvbd*/g' | perl -e 'while(<>){chop;@line=split("=");$$T{$$line[0]}+=$$line[1];} foreach $$t(keys %T){print "$$t = $$T{$$t}\n";}' >> $@

