
package edu.umn.cs.nlp.textproc;

import java.util.ArrayList;
import java.util.TreeSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.Vector;
import java.io.File;

public class VerbCounter{
    
    public static void countVerbs(Tree tree, Map<String, Integer> map){
        if(tree.children.size() == 1 && tree.children.get(0).children.size() == 0){
          if(tree.cat.startsWith("V")){
            if(map.containsKey(tree.children.get(0).getCat())){
              map.put(tree.children.get(0).getCat(), map.get(tree.children.get(0).getCat()) + 1);
            }else{
              map.put(tree.children.get(0).getCat(), 1);
            }
          }
        }else{
          for(Tree t : tree.children){
            countVerbs(t, map);
          }
        }
    }
    
    
    public static void lexicalizeVerbs(Tree tree, Set<String> verbs){
        for(Tree t : tree.children){
          lexicalizeVerbs(t, verbs);
        }
        
        if(tree.children.size() == 1 && tree.children.get(0).children.size() == 0){
          if(tree.cat.startsWith("V") && verbs.contains(tree.children.get(0).getCat())){
            tree.setCategory(tree.cat + "head" + tree.children.get(0).getCat());
          }
        }
    }
    
    
    public static void main(String[] args){
        Scanner scanner =new Scanner(System.in);
        HashMap<String,Integer> map = new HashMap<String,Integer>();      // For tracking verb counts in input trees
        TreeSet<VerbCountKey> sortedSet = new TreeSet<VerbCountKey>();  // for sorting verbs by count
        HashSet<String> verbs = new HashSet<String>();                    // for holding the top N verbs that we will lexicalize
        ArrayList<Tree> trees = new ArrayList<Tree>();                    // list of trees
        int numLex = 0;
        
        if(args.length > 0){
          numLex = Integer.parseInt(args[0]);
        }
        
        System.err.println("Lexicalizing the " + numLex + " most common verbs...");
        
        // read in each tree in bracketed form, turn it into a tree data structure,
        // and count all the verbs it uses...
        while(scanner.hasNextLine()){
            if(numLex == 0){
              System.out.println(scanner.nextLine());
            }else{
                Tree tree = new Tree(scanner.nextLine(), null);
                VerbCounter.countVerbs(tree, map);
                trees.add(tree);
            }
        }
        
        if(numLex == 0){
          System.err.println("Exiting early -- no lexicalization...");
          System.exit(0);
        }
        
        // sort entries into treeset object....
        for(Map.Entry<String,Integer> entry : map.entrySet()){
          VerbCountKey key = new VerbCountKey();
          key.verb = entry.getKey();
          key.count = entry.getValue();
          sortedSet.add(key);
        }

        // TESTING: Print out verb hash!
        int i = 0;
        for(VerbCountKey key : sortedSet){
          System.err.println(key.verb + " => " + key.count);
          verbs.add(key.verb);
          if(++i >= numLex){
            break;
          }
        }
        // lexicalize and print each tree...
        for(Tree tree : trees){
          lexicalizeVerbs(tree, verbs);
          System.out.println(tree);
        }
    }
}

class VerbCountKey implements Comparable<VerbCountKey> {
  int count;
  String verb;
  
  public int compareTo(VerbCountKey other){
      if(count > other.count) return -1;
      else if(count < other.count) return 1;
      else return verb.compareTo(other.verb);
  }
}


