// main.cpp
//
// by William M. Darling (c) 2010
//
// This is the main file for SyntaxSum. It creates a syntaxsum object from the 
// SyntaxSum class and runs Gibbs Sampling to get the topic-word probabilities 
// and the syntax-word probabilities so that we can perform summarization
// using our topic-syntax model.
//
// Once this Gibbs Sampler has been run to convergence, the output files can 
// then be used with the python script summarizers syntax_sumA.py and syntax_sumB.py.

#include <iostream>
#include <cstdlib>
#include "syntaxsum.h"

// These are good settings for DUC 2006. For other datasets, change the number of topics (to match the number of document sets),
// and the number of documents per document set for the particular corpus. The number of states minimally affects the summarization 
// results for syntax_sum.
#define BETA		0.01
#define GAMMA		0.01
#define ITERATIONS	1000
#define NUM_TOPICS	50
#define NUM_STATES	5			// not including state for content words or state for end-of-sentence
#define DPS		25			// number of documents per document set for this corpus

int main(int argc, char **argv)
{

	if(argc<3) {
		fprintf(stderr, "usage:\t%s <word_stream> <doc_stream>\n", argv[0]);
		exit(1);
	}
	
	const char *WS = argv[1];
	const char *DS = argv[2];
	
	// create new syntaxsum object
	syntaxsum *SyntaxSum = new syntaxsum();
	
	// set up the syntaxsum object
	SyntaxSum->setup(BETA, GAMMA, WS, DS, NUM_TOPICS, NUM_STATES, DPS);
	
	// run Gibbs Sampling...
	SyntaxSum->run(ITERATIONS);
	
	// write topic and syntax distributions files
	SyntaxSum->write();
	
	// clean up
	delete SyntaxSum;
	
	return 0;
}
