
/*******************************************************************/
/*                                                                 */
/*     File: nbest.C                                               */
/*   Author: Helmut Schmid                                         */
/*  Purpose:                                                       */
/*  Created: Wed Jul 12 09:11:47 2006                              */
/* Modified: Thu Jan 15 10:11:20 2009 (schmid)                     */
/* Copyright: Institut fuer maschinelle Sprachverarbeitung         */
/*            Universitaet Stuttgart                               */
/*                                                                 */
/*******************************************************************/

#include "quote.h"
#include "parser.h"

class NBestParse {
public:
  Prob prob;
  Node root;
  vector<unsigned short> ncd;

  NBestParse( Prob p, Node r, vector<unsigned short> &pp ) 
    : prob(p), root(r), ncd(pp) {}
};

class BestParses {
  const size_t nbest;

public:
  vector<NBestParse*> parse;
  
  bool add( Prob p, Node root, vector<unsigned short> &pp ) {

    if (p == (Prob)0.0 ||
	(parse.size() == nbest && parse.back()->prob >= p))
      return false;
    // add the new analysis in sorted order
    if (parse.size() < nbest)
      parse.push_back( NULL );
    size_t i;
    for( i=parse.size()-1; i>0; i-- )
      if (parse[i-1]->prob < p)
	parse[i] = parse[i-1];
      else
	break;
    parse[i] = new NBestParse(p, root, pp);
    return true;
  }

  BestParses( size_t n ) : nbest(n) {}
  ~BestParses() {
    for( size_t i=0; i<parse.size(); i++ )
      delete parse[i];
  }
};


/*******************************************************************/
/*                                                                 */
/*  add_parses                                                     */
/*                                                                 */
/*******************************************************************/

static bool add_parses( Node node, vector<unsigned short> &ncd, 
			size_t &index, unsigned short &depth, size_t parsenum, 
			BestParses &bp )
{
  depth++;

  // create the sorted list of edges
  vector<Edge> edge;
  for( Node::iterator it=node.begin(); it!=node.end(); ++it )
    edge.push_back( *it );
  sort(edge.begin(), edge.end());

  if (index == ncd.size()) {   // end of position list reached?
    // add trees with the sub-optimal analyses of the current node
    size_t old_size = ncd.size();
    Prob p = bp.parse[parsenum]->prob / node.prob();
    bool result = false;
    for( size_t i=1; i<edge.size(); i++ ) {
      ncd.push_back( depth );
      if (!bp.add( p * edge[i].prob(), bp.parse[parsenum]->root, ncd ))
	break;
      else
	result = true;
    }
    ncd.resize( old_size );

    // add trees with sub-optimal analyses of the daughter nodes
    for( Edge::iterator it=edge[0].begin(); it!=edge[0].end(); ++it )
      result |= add_parses( *it, ncd, index, depth, parsenum, bp );
    return result;
  }

  else {   // end of position list not reached yet
    // find the current edge
    size_t i;
    for( i=0; index<ncd.size() && ncd[index]==depth; i++ )
      index++;
    Edge &e = edge[i];
    for( Edge::iterator it=e.begin(); it!=e.end() && index<=ncd.size(); ++it )
      if (add_parses( *it, ncd, index, depth, parsenum, bp ))
	return true;
    return false;
  }
}


/*******************************************************************/
/*                                                                 */
/*  print_nbest_node                                               */
/*                                                                 */
/*******************************************************************/

static void print_nbest_node( Node node, vector<unsigned short> &ncd, 
			      size_t &index, unsigned short &depth, 
			      int rn, size_t &tpos, size_t &dpos, FILE *file)
{
  depth++;
  if (!node.is_aux())
    fprintf( file, "(%s ", quote(node.symbol_name()));

  // create the sorted list of edges
  vector<Edge> edge;
  for( Node::iterator it=node.begin(); it!=node.end(); ++it )
    edge.push_back( *it );
  sort(edge.begin(), edge.end());

  // find the current edge
  size_t i;
  for( i=0; index<ncd.size() && ncd[index]==depth; i++ )
    index++;
  Edge &e = edge[i];
  if (e.is_terminal())
    fputs( quote(e.word()), file );
  else {
    if (node.is_aux())
      for( Edge::iterator it=e.begin(); it!=e.end(); ++it )
	print_nbest_node( *it, ncd, index, depth, rn, tpos, dpos, file );
    else {
      size_t tp = 0;
      size_t dp = 0;
      int rn = e.source_rule_number();
      node.my_parser()->grammar.traces.print_trace( rn, tp, dp, file );
      for( Edge::iterator it=e.begin(); it!=e.end(); ++it )
	print_nbest_node( *it, ncd, index, depth, rn, tp, dp, file );
    }
  }

  if (!node.is_aux())
    fputc( ')', file );

  node.my_parser()->grammar.traces.print_trace( rn, tpos, ++dpos, file );
}


/*******************************************************************/
/*                                                                 */
/*  Parser::print_nbest_parses                                     */
/*                                                                 */
/*******************************************************************/

void Parser::print_nbest_parses( FILE *file )

{
  if (parse.number_of_nodes() == 0)
    failure_output( file );

  else {
    BestParses bp( NBest );
    vector<unsigned short> next_change_at_depth;

    // add the best parse tree for each top-most category
    for( iterator it=begin(); it!=end(); ++it ) {
      Node root = *it;
      bp.add( root.prob(), root, next_change_at_depth );
    }
    
    // add the next best parse trees
    for( size_t i=0; i<bp.parse.size(); i++ ) {
      next_change_at_depth = bp.parse[i]->ncd;
      size_t index=0;
      unsigned short depth=0;
      add_parses( bp.parse[i]->root, next_change_at_depth, index, depth, i, bp);
    }
    
    // print the n best parse trees
    for( size_t i=0; i<bp.parse.size(); i++ ) {
      if (ViterbiProbs)
	fprintf(file, "vitprob=%g\n", (double)bp.parse[i]->prob);
      size_t index=0;
      unsigned short depth=0;
      size_t tp=0, dp=0;
      int rn=0;
      print_nbest_node( bp.parse[i]->root, bp.parse[i]->ncd,
			index, depth, rn, tp, dp, file);
      fputc( '\n', file );
    }
  }

  fputc( '\n', file );
}
