

DATA_DIR = /project/nlp/data/treebank/parsed/mrg/wsj
EVALB_DIR = /project/nlp/tmill/working/timrec/EVALB

training-data: wsj-02-21.mrg

train: wsj-02-21.obj.gz

test-data: wsj-23.mrg

test: wsj-23.in.parsed

wsj-02-21.mrg:
	cat $(DATA_DIR)/0[2-9]/*.mrg $(DATA_DIR)/1[0-9]/*.mrg $(DATA_DIR)/2[01]/*.mrg > wsj-02-21.mrg


wsj-02-21.obj.gz: wsj-02-21.mrg
	bin/train 800 settings/collins.properties wsj-02-21.mrg

wsj-23.mrg:
	cat $(DATA_DIR)/23/*.mrg | perl -p -e 's/\n//g' > wsj-23.mrg
	perl -pi -e 's/\( \(S/\n( (S/g' wsj-23.mrg
	perl -pi -e 's/^\n$$//g' wsj-23.mrg
	perl -pi -e 's/^ *//g;s/  */ /g;' wsj-23.mrg

wsj-23.in: wsj-23.mrg
	cp wsj-23.mrg wsj-23.in
	#perl -pi -e 's/\n//g' wsj-23.in
	#perl -pi -e 's/\( \(S/\n( (S/g' wsj-23.in
	#perl -pi -e 's/\([^ ]*//g;s/\)//g;s/^ *//g;s/  */ /g;' wsj-23.in
	#perl -pi -e 's/^\n$$//g' wsj-23.in
	perl -pi -e 's/\([^ ]*//g;s/\)//g' wsj-23.in
	perl -pi -e 's/^(.*)$$/(\1)/g' wsj-23.in
	perl -pi -e 's/\*[^ ]* //g' wsj-23.in
	perl -pi -e 's/ 0 / /g' wsj-23.in

wsj-23.in.parsed: wsj-02-21.obj.gz wsj-23.in
	bin/parse 400 settings/collins.properties wsj-02-21.obj.gz wsj-23.in

eval: wsj-23.in.parsed wsj-23.mrg
	$(EVALB_DIR)/evalb -p $(EVALB_DIR)/COLLINS.prm wsj-23.mrg wsj-23.in.parsed

