################################################################################
#
#                   Semantic Role Labeleing sub-project
#                  in wsjparse Makefile: include srl.mk
#
################################################################################

#WSJTREEDATA=genmodel/propbanktrees/parsed/mrg/wsj
WSJTREEDATA=/project/nlp/data/treebank/parsed/mrg/wsj

ID_OR_H=id

#genmodel/wsj%.trees: $(WSJTREEDATA) scripts/tbtrees2trees.pl ##scripts/propParent.rb
#	make genmodel
#	cat $(WSJTREEDATA)/$*/*.mrg | perl scripts/tbtrees2trees.pl > $@

#.PRECIOUS: %-pu.bintrees.leaf.srl %-pu.bintrees.srl %-pu.bintrees.synrole %-pu.bintrees.m0
#%-pu.bintrees: %.trees scripts/treebinarize.pl scripts/mergeSynRoleTrees.rb scripts/pushUpSrlLabels.rb
#	make genmodel/propbanktrees/wsj $*.trees
#	cat $*.trees | perl scripts/treebinarize.pl > $@.leaf.srl
#	cat $@.leaf.srl | ruby scripts/pushUpSrlLabels.rb > $@.srl
#	cat $@.srl | sed 's/(REL\!ldelim\!/(r!ldelim!/g;s/(ARGM[A-Za-z0-9\-]*\!ldelim\!/(m!ldelim!/g' | sed 's/(ARG[A-Za-z0-9\-]*\!ldelim\!/(a!ldelim!/g;s/(ID\!ldelim\!/(id!ldelim!/g;' > $@.synrole
#	cat $@.synrole | ruby scripts/createM0.rb > $@.m0
#	cat $@.m0 | sed 's/(m0\!ldelim\!/(m!ldelim!/g' | ruby scripts/mergeSynRoleTrees.rb > $@.merged
#	cat $@.merged | cut -f4- -d" " > $@
	#cat $@.merged | ruby scripts/shallowSem.rb -v > $@

# test the soundness of sem -> pbr on gold parsed tree.
# try genmodel/wsj22-pu.sem2pbr.gold.parse.eval
%.sem2pbr.gold.parse.eval:        #%.sem2pbr.model
	#compare $*.gold.pbr $*.hypoth.pbr
	ruby scripts/eval-srl.rb $*.gold.pbr $*.hypoth.pbr > $@

%.gold.pbr:
	cat genmodel/propbanktrees/parsed/mrg/wsj/22/*.mrg | perl scripts/tbtrees2trees.pl > genmodel/wsj22.trees
	cat genmodel/wsj22.trees | perl scripts/treebinarize.pl > $*.bintrees
	cat $*.bintrees | ruby scripts/propArgs.rb > $*.argtrees
	cat $*.argtrees | perl scripts/propGaps.pl > $*.gaptrees
	cat $*.gaptrees | perl -ne 's/\(([^ ]*) *([^()#]*) *\)/(\1 \1\#\2)/g && print' | perl scripts/trees2cnftrees.pl > $*.ucnftrees
	cat $*.ucnftrees | grep -v "(S-extrNP )" | grep -v "(NP-extrNP )" | ruby -ne 'print $$_.scan(/:[^:)]*\)/).join(" "), "\n"' | sed 's/[:)]//g;s/REL\-[^ ]*/REL/g' > $*.gold.pbr

%.a0_for_m_sibling.hypoth.pbr:
	cat $*.a0_for_m_sibling.ucnftrees | grep -v "(S-extrNP )" | grep -v "(NP-extrNP )" | sed 's/:NULL//g;s/:ARG[0-5][^)]*//g;s/:ARGM\-[^)]*//g' | ruby scripts/propSemRoles.rb > $*.a0_for_m_sibling.semroletrees_a #keep only REL  #grep out line 24513, 36261
	cat $*.a0_for_m_sibling.semroletrees_a | ruby scripts/calcHdwdTree-srl.rb > $*.a0_for_m_sibling.semroletrees_am
	ruby scripts/sem2PBR.rb -m genmodel/srl.a0_for_m_sibling.sem2pbr.model -s $*.a0_for_m_sibling.semroletrees_am > $*.a0_for_m_sibling.pbrtrees
	cat $*.a0_for_m_sibling.pbrtrees | ruby scripts/pushDownPBRLabels.rb | ruby -ne 'print $$_.scan(/:[^:)]*\)/).join(" "), "\n"' | sed 's/[:)]//g' > $@  #$*.hypoth.pbr

%.hypoth.pbr:
	cat $*.ucnftrees | grep -v "(S-extrNP )" | grep -v "(NP-extrNP )" | sed 's/:NULL//g;s/:ARG[0-5][^)]*//g;s/:ARGM\-[^)]*//g' | ruby scripts/propSemRoles.rb > $*.semroletrees_a #keep only REL  #grep out line 24513, 36261
	cat $*.semroletrees_a | ruby scripts/calcHdwdTree-srl.rb > $*.semroletrees_am
	ruby scripts/sem2PBR.rb -m genmodel/srl.sem2pbr.model -s $*.semroletrees_am > $*.pbrtrees
	cat $*.pbrtrees | ruby scripts/pushDownPBRLabels.rb | ruby -ne 'print $$_.scan(/:[^:)]*\)/).join(" "), "\n"' | sed 's/[:)]//g' > $@  #$*.hypoth.pbr

#### trees annotated with semantic roles a0, a1, a2, etc. 
.PRECIOUS: %.semroletrees
%.semroletrees: %.gaptrees scripts/propSemRoles.rb
	cat $< | grep -v "(S-extrNP )" | grep -v "(NP-extrNP )" | ruby scripts/propSemRoles.rb > $@   #grep out line 24513, 36261

#### unconverted cnf trees
.PRECIOUS: %.ucnftrees
%.ucnftrees: %.semroletrees scripts/trees2cnftrees.pl
	cat $< | perl -ne 's/\((\w+\!semdelim\!)?([^ ]*) *([^()#]*) *\)/(\1\2 \2\#\3)/g && print' | perl scripts/trees2cnftrees.pl > $@

#### obtain headword-limited cnf trees from unconverted cnf trees, using count cutoff paramenter following '-hw' delimiter (e.g. wsjTRAIN-pu-hw1000)
.PRECIOUS: %.hcnftrees 
%.hcnftrees: $$(word 1,$$(subst -hw, ,%)).ucnftrees  scripts/calcHdwdTree-srl.rb
	cat  $<  |  ruby $(word 2,$^)  -b $(word 2,$(subst -hw, ,$*))  |  sed 's/( /(/g;s/ )/)/g;s/ )/)/g'  |  grep -v '^$$'  >  $@

#### depth-sensitive cnf trees with terminals converted to lowercase, using word count cutoff paramenter following '-cc' delimiter (e.g. wsjTRAIN-pu-1khw-cc10)
.PRECIOUS: %.dcnftrees 
%.dcnftrees: $$(word 1,$$(subst -cc, ,%)).hcnftrees  bin/expand-mod-relns bin/remove-rare-cats scripts/lowercasepreterm.rb scripts/cnftrees2cedepths.rb
	cat $< | sed 's/\!semdelim\!/:/g' | bin/expand-mod-relns | bin/remove-rare-cats $(word 2,$(subst -cc, ,$*)) | ruby scripts/lowercasepreterm.rb | ruby scripts/cnftrees2cedepths.rb | grep -v '\^R,5' > $@




genmodel/wsj%srltrees: scripts/tbtrees2trees.pl
	cat $(WSJTREEDATA)/$*/*.mrg | perl scripts/tbtrees2trees.pl > $@

genmodel/wsjTRAIN-%.trees: $(foreach sect,$(WSJTRAINSECTS),genmodel/wsj$(sect)%trees) ##genmodel/wsjEOS$*trees  ##genmodel/eos.cnftrees
	@echo "WARNING: undertaking major rebuild from '$@'!  Press CTRL-C to abort!"
	@sleep 5
	cat $^ > $@

.PRECIOUS: %.bintrees %.argtrees %.gaptrees %.ucnftrees %.prbtrees %.prbsem_a_trees %.prbsem_am_trees
%.sem2pbr.model: #genmodel/wsjTRAIN-srl.trees
#	make genmodel/wsjTRAIN-srl.trees WSJTREEDATA=genmodel/propbanktrees/parsed/mrg/wsj
#	cat genmodel/wsjTRAIN-srl.trees | perl scripts/treebinarize.pl > $*.bintrees
#	cat $*.bintrees | ruby scripts/propArgs.rb > $*.argtrees
#	cat $*.argtrees | perl scripts/propGaps.pl > $*.gaptrees
#	cat $*.gaptrees | perl -ne 's/\(([^ ]*) *([^()#]*) *\)/(\1 \1\#\2)/g && print' | perl scripts/trees2cnftrees.pl > $*.ucnftrees
#	cat $*.ucnftrees | ruby scripts/pushUpPBRLabels.rb > $*.pbrtrees 	
	cat $*.pbrtrees | ruby scripts/propSemRoles.rb > $*.pbrsem_a_trees
	cat $*.pbrsem_a_trees | ruby scripts/calcHdwdTree-srl.rb > $*.pbrsem_am_trees
	cat $*.pbrsem_am_trees | ruby scripts/sem2PBRTrain.rb | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl > $@


.PRECIOUS: %.sents
%.srl:
	make $*.sents WSJTREEDATA=genmodel/propbanktrees/wsj
	#cat $*.sents | sed -e 's/ [^:]* / /g' | sed -e 's/\(.*\)REL-\([^ ]*\) \(.*\)/\2 \1REL \3/' | sed -e 's/\([^ ]*\):\([^ ]*\)/\1/g' > $*.sents.srl
	cat $*.sents | sed -e 's/ [^:]* / /g' | sed -e 's/\(.*\)REL-\([^\.]*\)\(\.[^ ]*\) \(.*\)/\2 \1REL \4/' | sed -e 's/\([^ ]*\):\([^ ]*\)/\1/g' > $*.sents.srl
	#cat $*.sents | sed -e 's/ [^:]* / /g' | sed -e 's/\(.*\)REL-\([^ ]*\) \(.*\)/\2 \1REL1 \3/' | sed -e 's/\([^\.]*\)\... \(.*\)/\1 \2/' | sed -e '/REL-/{s/\([^ ]*\) \(.*\) \([^ ]*\):REL1 \(.*\)/\1-\3 \2 \3:REL2 \4/;}'  | sed -e 's/\([^ ]*\):\([^ ]*\)/\1/g' > $*.sents.srl
	# cut ARG0-TO to just ARG0 etc.
	cat $*.sents | sed -e 's/ [^:]* / /g' | sed -e 's/REL-\([^ ]*\)/REL/g' | sed -e 's/\([^ ]*\):\([^ ]*\)/\2/g' | sed -e 's/ARG\([^\-]\)-[^ ]*/ARG\1/g' > $*.gold.srl

#.PRECIOUS: hhmm.srl-t2m.wsjpu.dec09.500.wsj23pu.hypoth.raw
.PRECIOUS: %.hypoth.raw
%.srl.eval:
	#cat $*.sents.srl | bin/hhmmparser-srl genmodel/srl-t2m.wsjpu.model genmodel/POS.wsjpu.model 2>/dev/null > $*.hypoth.raw
	cat $*.hypoth.raw | perl scripts/hypoths2rctrees-srl.pl | ./scripts/trees2words.sh | sed 's/NIL/NULL/g;s/ARGM-AUX/NULL/g' > $*.hypoth.srl 
	ruby scripts/eval-srl.rb $*.gold.srl $*.hypoth.srl > $@

genmodel/propbanktrees/parsed/mrg/wsj:
	@echo 'Do you really want to re-build this big $@ file?  If not, CTRL-C and copy it from somewhere!'
	@sleep 5 
	rm -rf $@
	mkdir -p $@/EOS 
	cat $(PROPTXT) | ruby scripts/tbtrees2srltrees.rb -v 1 -p
	cat srcmodel/EOS.trees | sed 's/eos/eos:NULL/g' > $@/EOS/eos.mrg
	#cat $(PROPTXT) | ruby scripts/tbtrees2srltrees.rb -v 2
	#cat srcmodel/EOS.trees | sed 's/S/ID:S/g' | sed 's/END/ID:END/g' > $@/EOS/eos.mrg

.PRECIOUS: genmodel/srl-t2m.%.model
genmodel/srl-t2m.%.model: genmodel/propbanktrees/wsj
	make genmodel/all.$*.bintrees WSJTREEDATA=genmodel/propbanktrees/wsj
	make genmodel/all.$*.crctrees WSJTREEDATA=genmodel/propbanktrees/wsj 
	cat  genmodel/all.$*.crctrees | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | sed 's/(\. \.#\.:NULL))$$/(\. \.#\.:NIL))/g;' | ruby scripts/trees2dat-srl.rb | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl > $@
#
	make genmodel/pw.$*.tally WSJTREEDATA=genmodel/propbanktrees/wsj
	cat  genmodel/pw.$*.tally | awk -F':' '{ print $$1 " :" $$3 }' | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | perl -n -e 'if(s/^Pw ([^ ]+).*/W : \1 = 1.0/){ print }' | sort -u >> $@
	# this line Pw eos : END = 102 does not have arg for eos, keep the Pw and W model, **** but it hurt when having these 2 lines *****
	echo "W : eos = 1.0" >> $@
	echo "Pw eos : END = 1" >> $@
	cat  genmodel/pw.$*.tally | awk -F':' '{ print $$1 " :" $$3 }' | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | perl scripts/sortbyprob.pl >> $@
#
#	cat $(PROPTXT) |awk -F' ' '{ print "Rel null : " $$5 }' | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl >> $@
#	cat $(PROPTXT) |cut -f5 -d' ' | sort -u | awk '{ print "Rel " $$1 " : " $$1 " = 1.000000" }' >> $@

.PRECIOUS: genmodel/srl-t2m.%.model.no.main.arg.ext
genmodel/srl-t2m.%.model.no.main.arg.ext: genmodel/propbanktrees/wsj
	make genmodel/all.$*.bintrees WSJTREEDATA=genmodel/propbanktrees/wsj
	make genmodel/all.$*.crctrees WSJTREEDATA=genmodel/propbanktrees/wsj 
	cat  genmodel/all.$*.crctrees | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | sed 's/(\. \.#\.:NULL))$$/(\. \.#\.:NIL))/g;' | ruby scripts/trees2dat-srl.rb -m -c | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl >> $@
#
	make genmodel/pw.$*.tally WSJTREEDATA=genmodel/propbanktrees/wsj
	cat  genmodel/pw.$*.tally | awk -F':' '{ print $$1 " :" $$3 }' | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | perl -n -e 'if(s/^Pw ([^ ]+).*/W : \1 = 1.0/){ print }' | sort -u >> $@
	# this line Pw eos : END = 102 does not have arg for eos, keep the Pw and W model, **** but it hurt when having these 2 lines *****
	echo "W : eos = 1.0" >> $@
	echo "Pw eos : END = 1" >> $@
	echo "Ge 1 REST END : end = 1" >> $@
	echo "Pc end : END = 1" >> $@
	cat  genmodel/pw.$*.tally | awk -F':' '{ print $$1 " :" $$3 }' | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | perl scripts/sortbyprob.pl >> $@
#
#	cat $(PROPTXT) |awk -F' ' '{ print "Rel null : " $$5 }' | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl >> $@
#	cat $(PROPTXT) |cut -f5 -d' ' | sort -u | awk '{ print "Rel " $$1 " : " $$1 " = 1.000000" }' >> $@
#temporary commented out the smooth
#	ruby scripts/smooth-srl.rb -m genmodel/gf-t2m.wsjpu.model.fromTim .1 $@ .9 | ruby scripts/smooth-srl-A-to-G.rb $@ genmodel/top200G .1 | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl > $@.smooth

genmodel/srl-t2m.%.model.no.arg.ext: genmodel/propbanktrees/wsj
	make genmodel/all.$*.bintrees WSJTREEDATA=genmodel/propbanktrees/wsj
	make genmodel/all.$*.crctrees WSJTREEDATA=genmodel/propbanktrees/wsj 
	cat  genmodel/all.$*.crctrees | grep -v '\^[5-9]' | sed 's/\^[0-9]//g' | sed 's/(\. \.#\.:NULL))$$/(\. \.#\.:NIL))/g;' | ruby scripts/trees2dat-srl.rb -ne | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl > $@
#
	make genmodel/pw.$*.tally WSJTREEDATA=genmodel/propbanktrees/wsj
	cat  genmodel/pw.$*.tally | awk -F':' '{ print $$1 " :" $$3 }' | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | perl -n -e 'if(s/^Pw ([^ ]+).*/W : \1 = 1.0/){ print }' | sort -u >> $@
	# this line Pw eos : END = 102 does not have arg for eos, keep the Pw and W model, **** but it hurt when having these 2 lines *****
	echo "W : eos = 1.0" >> $@
	echo "Pw eos : END = 1" >> $@
	echo "Ge 1 REST END : end = 1" >> $@
	echo "Pc end : END = 1" >> $@
	cat  genmodel/pw.$*.tally | awk -F':' '{ print $$1 " :" $$3 }' | perl scripts/relfreq.pl -c $(COUNTCUTOFF) | perl scripts/sortbyprob.pl >> $@
#
#	cat $(PROPTXT) |awk -F' ' '{ print "Rel null : " $$5 }' | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl >> $@
#	cat $(PROPTXT) |cut -f5 -d' ' | sort -u | awk '{ print "Rel " $$1 " : " $$1 " = 1.000000" }' >> $@
#	ruby scripts/smooth-srl.rb -ne genmodel/gf-t2m.wsjpu.model.fromTim .1 $@ .9 | perl scripts/relfreq.pl | perl scripts/sortbyprob.pl > $@.smooth
