package translator;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import utils.Params;
import edu.illinois.cs.cogcomp.annotation.handler.IllinoisChunkerHandler;
import edu.illinois.cs.cogcomp.core.utilities.ResourceManager;
import edu.illinois.cs.cogcomp.core.utilities.commands.CommandDescription;
import edu.illinois.cs.cogcomp.core.utilities.commands.InteractiveShell;
import edu.illinois.cs.cogcomp.edison.sentences.Constituent;
import edu.illinois.cs.cogcomp.edison.sentences.TextAnnotation;
import edu.illinois.cs.cogcomp.edison.sentences.TreeView;
import edu.illinois.cs.cogcomp.edison.sentences.ViewNames;
import edu.illinois.cs.cogcomp.nlp.pipeline.IllinoisCachingPreprocessor;
import edu.illinois.cs.cogcomp.nlp.pipeline.IllinoisPreprocessor;

public class EnglishPrime {
	
	public static int rule1 = 0;
	public static int rule2 = 0;
	public static int rule3 = 0;
	
	public static List<Integer> removeDuplicatedFromSortedList(List<Integer> list) {
		List<Integer> newIndices = new ArrayList<>();
		int k=-1;
		for(Integer i : list) {
			if(k==-1 || (k>=0 && newIndices.get(k)<i)) {
				newIndices.add(i);
				k++;
			}
		}
		return newIndices;
	}
	
	public static String modifyEnglish(
			TextAnnotation ta,
			List<Constituent> posTags,
			List<Constituent> chunks, 
			List<Constituent> parse) {
		List<String> infinitives = Arrays.asList(
				"can", "could", "may", "might", "will", "would");
		String engPrime = "";
		// Find segmentation into clauses
		List<Integer> indicesClauseBP = new ArrayList<Integer>();
		indicesClauseBP.add(0);
		indicesClauseBP.add(ta.size());
		for(Constituent cons : parse) {
			if((cons.getLabel().equals("SBAR") || cons.getLabel().equals("S")) 
					&& !ta.getToken(cons.getStartSpan()).equals("to")) {
				indicesClauseBP.add(cons.getStartSpan());
				indicesClauseBP.add(cons.getEndSpan());
			}
		}
		// Sort the indices and remove duplicated
		Collections.sort(indicesClauseBP);
		indicesClauseBP = removeDuplicatedFromSortedList(indicesClauseBP);
//		System.out.println("Indices : "+Arrays.asList(indicesClauseBP));
		boolean anyModification = false;
		for(int i=0; i<indicesClauseBP.size()-1; ++i) {
			int start = indicesClauseBP.get(i);
			int end = indicesClauseBP.get(i+1);
			if((i+1) == (indicesClauseBP.size()-1)) {
				end--;
			}
			boolean modified = false;
			for(Constituent cons : chunks) {
				if(cons.getLabel().equals("VP") && cons.getStartSpan() >= start && 
						cons.getEndSpan() < end) {
					// Rule 1 : Break at "to"
					int index = -1;
					for(int j=cons.getStartSpan(); j<cons.getEndSpan(); ++j) {
						if(ta.getToken(j).equals("to")) {
							index = j;
							break;
						}
					}
					if(index >= 0) {
						for(int j=start; j<index; ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						for(int j=cons.getEndSpan(); j<end; ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						for(int j=index; j<cons.getEndSpan(); ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						modified = true;
						anyModification = true;
						rule1++;
						// Each clause can be transformed at most once
						break;
					}
					// Rule 2 : Reorder for participles
					index = -1;
					for(int j=cons.getStartSpan(); j<cons.getEndSpan(); ++j) {
						if(posTags.get(j).getLabel().equals("VBN")) {
							index = j;
							break;
						}
					}
					if(index >= 0) {
						for(int j=start; j<index; ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						for(int j=cons.getEndSpan(); j<end; ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						for(int j=index; j<cons.getEndSpan(); ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						modified = true;
						anyModification = true;
						rule2++;
						// Each clause can be transformed at most once
						break;
					}
					// Rule 3 : Reorder for infinitives
					if(infinitives.contains(ta.getToken(cons.getStartSpan()))) { 
						for(int j=start; j<=cons.getStartSpan(); ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						for(int j=cons.getEndSpan(); j<end; ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						for(int j=cons.getStartSpan()+1; j<cons.getEndSpan(); ++j) {
							engPrime += ta.getToken(j) + " ";
						}
						modified = true;
						anyModification = true;
						rule3++;
						// Each clause can be transformed at most once
						break;
					}
				}
			}
			if(!modified) {
				for(int j=start; j<end; ++j) {
					engPrime += ta.getToken(j) + " ";
				}
			}
//			System.out.println(start+" "+end+" : "+engPrime);
		}
		engPrime += ta.getToken(ta.size()-1) + " ";
		if(!anyModification) return ta.getText();
		return engPrime;
	}

	@CommandDescription(description = "Generate english prime file")
	public static void genEngPrimeFile(String inFile, String outFile) 
			throws Exception {
		BufferedReader br = new BufferedReader(new FileReader(
				new File(inFile)));
		BufferedWriter bw = new BufferedWriter(new FileWriter(
				new File(outFile)));

		IllinoisCachingPreprocessor prep = IllinoisCachingPreprocessor.getInstance(
				new ResourceManager(Params.preprocessorConfig), null);
        IllinoisCachingPreprocessor.openCache();
        
		String line;
		while ((line = br.readLine()) != null) {
			if(line.trim().equals("")) {
				bw.write("\n");
				continue;
			}
			TextAnnotation ta = null;
			List<Constituent> posTags = null, chunks = null, parse = null;
			TreeView treeView = null;
			try {
				ta = prep.getCachedTextAnnotation(line);
				posTags = ta.getView(ViewNames.POS).getConstituents();
				chunks = ta.getView(ViewNames.SHALLOW_PARSE).
								getConstituents();
				treeView = (TreeView) ta.getView(ViewNames.PARSE_STANFORD);
				parse = treeView.getConstituents();
			} catch (Exception e) {
				bw.write(line+"\n");
				continue;
			}
			String modified = modifyEnglish(ta, posTags, chunks, parse);
			bw.write(modified+"\n");
			if(!modified.equals(ta.getText())) {
//				System.out.println("Input : "+ta.getText());
//				System.out.println();
//				System.out.println("Parse Tree : \n"+treeView.getTree(0));
//				System.out.println("Modified : "+modified);
//				System.out.println();
//				System.out.println();
			}
		}
		System.out.println("Rule1 : "+rule1);
		System.out.println("Rule2 : "+rule2);
		System.out.println("Rule3 : "+rule3);
		br.close();
		bw.close();
	}
	
	public static void testSingleSentence() throws Exception {
		BufferedReader br = new BufferedReader(new InputStreamReader(
				System.in));
		System.out.println("Input : ");
		String line = br.readLine();
		IllinoisPreprocessor prep = new IllinoisPreprocessor(new ResourceManager(
				Params.preprocessorConfig));
		TextAnnotation ta = prep.processTextToTextAnnotation("", "", line, false);
		List<Constituent> posTags = ta.getView(ViewNames.POS).getConstituents();
		List<Constituent> chunks = ta.getView(ViewNames.SHALLOW_PARSE).
						getConstituents();
		TreeView treeView = (TreeView) ta.getView(ViewNames.PARSE_STANFORD);
		List<Constituent> parse = treeView.getConstituents();
		String modified = modifyEnglish(ta, posTags, chunks, parse);
		System.out.println("Parse Tree : \n"+treeView.getTree(0));
		System.out.println("\nModified : \n"+modified);
	}

	public static void main(String[] args) throws Exception {
		InteractiveShell<EnglishPrime> tester = new InteractiveShell<EnglishPrime>(
				EnglishPrime.class);
		if (args.length == 0) {
			tester.showDocumentation();
		} else {
			tester.runCommand(args);
		}
	}

}
