
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
//import org.w3c.dom.xpath.XPathExpression;
import org.xml.sax.SAXException;

import edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TextAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation;
import edu.stanford.nlp.trees.TypedDependency;
import edu.stanford.nlp.util.CoreMap;

public class ACE_DataReader 
{	
	StanfordCoreNLP pipeline;
	
	public ACE_DataReader()
	{
		Properties props = new Properties();
//		props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse");
		props.setProperty("annotators", "tokenize, ssplit, pos, depparse");
	    pipeline = new StanfordCoreNLP(props);
	}

	public static void main(String[] args) 
	{
		String baseDir = args[1];
		ACE_DataReader adr = new ACE_DataReader();
		Pattern speaker_pattern = Pattern.compile("<SPEAKER>(.+?)</SPEAKER>");
		int num_mentions_missed = 0;
		//Get the DOM Builder Factory
	    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
	    try
		{
		    File inputFolder = new File(args[0]);
		    FileWriter outputRelationFile = new FileWriter(baseDir + "/relations.txt");
		    FileWriter outputEventFile = new FileWriter(baseDir + "/events.txt");
		    File[] listOfFiles = inputFolder.listFiles();
		    
		    FileWriter outputMappingFile = new FileWriter(baseDir + "/mention_id_to_complete_entity_mention.txt");
		    HashMap<String, String> entity_mention_id_to_complete_mention = new HashMap<String, String>();
		    
		    int file_cnt = 1;
		    for(File inputFile : listOfFiles)
		    {
//		    	inputFile = new File(inputFolder.getPath() + "/ABC20001005.1830.1532.sgm");
		    	if(!inputFile.getAbsolutePath().endsWith(".sgm"))
		    		continue;
		    	System.out.println("File Number : " + file_cnt++);
		    	
//		    	String input_APF_file_name = inputFile.getName().replaceAll("sgm$", "apf.xml");
		    	String input_APF_file_name = inputFile.getAbsolutePath().replaceAll("sgm$", "apf.xml");
			    //Get the DOM Builder
			    DocumentBuilder builder = null;
				try {
					builder = factory.newDocumentBuilder();
				} catch (ParserConfigurationException e1) {
					// TODO Auto-generated catch block
					e1.printStackTrace();
				}
		
			    //Load and Parse the XML document
			    //document contains the complete XML as a Tree.
			    Document document= null;
				try {
//					document = builder.parse(
//					    ClassLoader.getSystemResourceAsStream("APW20001001.2021.0521.apf.xml"));
//					document = builder.parse(
//							ClassLoader.getSystemResourceAsStream(input_APF_file_name));
					document = builder.parse(new FileInputStream(input_APF_file_name));
				} catch (SAXException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
				XPathFactory xpathFectory = XPathFactory.newInstance();
				XPath xpath = xpathFectory.newXPath();
				
				
				//Iterating through the nodes and extracting the data.
			    NodeList nodeList = document.getDocumentElement().getChildNodes();
			    
			    ArrayList<String> rel_arg1_ids = new ArrayList<String>();
			    ArrayList<String> rel_arg2_ids = new ArrayList<String>();
				javax.xml.xpath.XPathExpression xpathExpression = null;
				try {
					xpathExpression = xpath.compile("//document/relation");
				} catch (XPathExpressionException e1) {
					// TODO Auto-generated catch block
					e1.printStackTrace();
				}
				
				NodeList attributeNodes = (NodeList) xpathExpression.evaluate(document, XPathConstants.NODESET);
				System.out.println(attributeNodes.getLength());
				for(int i = 0;i< attributeNodes.getLength(); i++)
				{
					Node node = attributeNodes.item(i);
					if(node instanceof Element)
					{
//						System.out.println(node.getNodeName());
//						System.out.println("\t" + ((Element) node).getAttribute("ID"));
//						System.out.println("\t" + ((Element)node).getAttribute("TYPE"));
//						System.out.println("\t" + ((Element)node).getAttribute("SUBTYPE"));
						
						String relation_type = ((Element)node).getAttribute("TYPE");
						String relation_subtype = ((Element)node).getAttribute("SUBTYPE");
						String relation_id = ((Element) node).getAttribute("ID");
						
						String entity_id_1 = "", entity_id_2 = "";
						String entity_type_1 = "", entity_type_2 = "";
						String entity_mention_id_1 = "", entity_mention_id_2 = "";
						String entity_mention_type_1 = "", entity_mention_type_2 = "";
						
						
						NodeList childNodes = node.getChildNodes();
						for(int j = 0; j < childNodes.getLength(); j++)
						{
							Node childNode = childNodes.item(j);
							if(childNode instanceof Element)
							{
								if(childNode.getNodeName().equals("relation_argument"))
								{
//									System.out.println("\t\t" + ((Element) childNode).getAttribute("REFID"));
									String entity_id = ((Element) childNode).getAttribute("REFID");
									String xquery_entity = "//document/entity[@ID='"+entity_id+"']";
									javax.xml.xpath.XPathExpression xpathExp_entity = xpath.compile(xquery_entity);
									Node entity_node = ((NodeList) xpathExp_entity.evaluate(document, XPathConstants.NODESET)).item(0);
	//									System.out.println("\t\t" + ((Element) entity_node).getAttribute("TYPE") + "\t" + ((Element) entity_node).getAttribute("SUBTYPE"));
									if(((Element)childNode).getAttribute("ROLE").equals("Arg-1"))
									{
										entity_id_1 = entity_id;
										entity_type_1 = ((Element) entity_node).getAttribute("TYPE");
									}
									else if(((Element)childNode).getAttribute("ROLE").equals("Arg-2"))
									{
										entity_id_2 = entity_id;
										entity_type_2 = ((Element) entity_node).getAttribute("TYPE");
									}
								}
								else if(childNode.getNodeName().equals("relation_mention"))
								{
									NodeList relaion_mention_childNodes = childNode.getChildNodes();
									for(int k = 0;k<relaion_mention_childNodes.getLength();k++)
									{
										Node relation_mention_childNode = relaion_mention_childNodes.item(k);
										if(relation_mention_childNode.getNodeName().equals("relation_mention_argument"))
										{
											String entity_mention_id = ((Element)relation_mention_childNode).getAttribute("REFID");
	//											System.out.println("\t\t\t" + entity_mention_id);
											String xquery_em = "//document/entity/entity_mention[@ID='"+entity_mention_id+"']";
											javax.xml.xpath.XPathExpression xpathExp_EM = xpath.compile(xquery_em);
											Node entity_mention_node = ((NodeList) xpathExp_EM.evaluate(document, XPathConstants.NODESET)).item(0);
	//											System.out.println("\t\t\t" + ((Element)entity_mention_node).getAttribute("TYPE"));
											
											xquery_em = "//document/entity/entity_mention[@ID='"+entity_mention_id+"']/head/charseq";
											javax.xml.xpath.XPathExpression xpathExp_EM_text = xpath.compile(xquery_em);
											Node entity_mention_head_node = ((NodeList) xpathExp_EM_text.evaluate(document, XPathConstants.NODESET)).item(0);
	//											System.out.println("\t\t\t" + entity_mention_head_node.getTextContent());
											
											if(((Element)relation_mention_childNode).getAttribute("ROLE").equals("Arg-1"))
											{
//												entity_mention_id_1 = entity_id_1 + "_" + entity_mention_id;
												entity_mention_id_1 = entity_mention_id;
												entity_mention_type_1 = ((Element)entity_mention_node).getAttribute("TYPE");
											}
											else if(((Element)relation_mention_childNode).getAttribute("ROLE").equals("Arg-2"))
											{
//												entity_mention_id_2 = entity_id_2 + "_" + entity_mention_id;
												entity_mention_id_2 = entity_mention_id;
												entity_mention_type_2 = ((Element)entity_mention_node).getAttribute("TYPE");
											}
										}
									}
									rel_arg1_ids.add(entity_mention_id_1);
									rel_arg2_ids.add(entity_mention_id_2);
//									System.out.println("\t" + entity_mention_id_1 + "\t" + entity_type_1 + "\t" +entity_mention_type_1);
//									System.out.println("\t" + entity_mention_id_2 + "\t" + entity_type_2 + "\t" +entity_mention_type_2);
									outputRelationFile.write(relation_id + "\t" + relation_type + 
											"\t" + entity_mention_id_1 + "\t" + entity_type_1 + "\t" +entity_mention_type_1 +
											"\t" + entity_mention_id_2 + "\t" + entity_type_2 + "\t" +entity_mention_type_2 + "\n");
									
//									outputRelationFile.write(relation_id + "\t" + relation_type + "\t" + relation_subtype + 
//											"\t" + entity_mention_id_1 + "\t" + entity_type_1 + "\t" +entity_mention_type_1 +
//											"\t" + entity_mention_id_2 + "\t" + entity_type_2 + "\t" +entity_mention_type_2 + "\n");
								}
							}
						}
					}
				}
				
				xpathExpression = null;
				try {
					xpathExpression = xpath.compile("//document/event/event_mention");
				} catch (XPathExpressionException e1) {
					// TODO Auto-generated catch block
					e1.printStackTrace();
				}
				
				attributeNodes = (NodeList) xpathExpression.evaluate(document, XPathConstants.NODESET);
				System.out.println(attributeNodes.getLength());
				for(int i = 0;i< attributeNodes.getLength(); i++)
				{
					Node node = attributeNodes.item(i);
					if(node instanceof Element)
					{						
						String event_mention_id = ((Element) node).getAttribute("ID");
						String ldc_scope = "";
						String anchor = "";
						
						NodeList childNodes = node.getChildNodes();
						for(int j = 0; j < childNodes.getLength(); j++)
						{
							Node childNode = childNodes.item(j);
							if(childNode instanceof Element)
							{
								if(childNode.getNodeName().equals("ldc_scope"))
								{
									NodeList ldc_scope_childNodes = childNode.getChildNodes();
									for(int k = 0;k<ldc_scope_childNodes.getLength();k++)
									{
										Node ldc_scope_childNode = ldc_scope_childNodes.item(k);
										if(ldc_scope_childNode.getNodeName().equals("charseq"))
										{
											ldc_scope = ldc_scope_childNode.getTextContent();
										}
									}
								}
								else if(childNode.getNodeName().equals("anchor"))
								{
									NodeList ldc_scope_childNodes = childNode.getChildNodes();
									for(int k = 0;k<ldc_scope_childNodes.getLength();k++)
									{
										Node ldc_scope_childNode = ldc_scope_childNodes.item(k);
										if(ldc_scope_childNode.getNodeName().equals("charseq"))
										{
											anchor = ldc_scope_childNode.getTextContent();
										}
									}
								}
							}
						}
						ldc_scope = ldc_scope.replaceAll("[\r\n]+", " ") + ".";
						anchor = anchor.replaceAll("[\r\n]+", " ");
						outputEventFile.write(event_mention_id + "\t" + ldc_scope + "\t" + anchor + "\n");
					}	
				}
				
				//if(true)
				//	continue;
				
//				String content = new Scanner(new File("/home/sachin/Research Work/Relation Extraction/ace_multilang_tr/data/English/nwire/APW20001001.2021.0521.sgm")).useDelimiter("\\Z").next();
				String content = new Scanner(inputFile).useDelimiter("\\Z").next();
	//			System.out.println(content);
				
				HashSet<String> speakers = new HashSet<String>();
				Matcher m_speaker_pattern = speaker_pattern.matcher(content);
				while(m_speaker_pattern.find())
				{
					speakers.add(m_speaker_pattern.group(1).trim());
				}
				
				content = content.replaceAll("</HEADLINE>", " zzzzzzz. </HEADLINE>");
				
				content = content.replaceAll("<[^<]+>", "");
				
				boolean checkForHL = false;
				if(content.contains("zzzzzzz"))
					checkForHL = true;
				
				String[] entity_types = new String[content.length()];
				String[] entity_mention_ids = new String[content.length()];
				
				HashMap<String, String> start_end_to_entity_type = new HashMap<String, String>();
				HashMap<Integer, Integer> start_to_end = new HashMap<Integer, Integer>();
				HashMap<String, String> start_end_to_entity_mention_id = new HashMap<String, String>();
				String xquery_e = "//document/entity/entity_mention/head/charseq";
				javax.xml.xpath.XPathExpression xpathExp_Entity_text = xpath.compile(xquery_e);
				NodeList entity_text_nodes = ((NodeList) xpathExp_Entity_text.evaluate(document, XPathConstants.NODESET));
				
				String xquery_e_complete = "//document/entity/entity_mention/extent/charseq";
				javax.xml.xpath.XPathExpression xpathExp_Entity_text_complete = xpath.compile(xquery_e_complete);
				NodeList entity_text_nodes_complete = ((NodeList) xpathExp_Entity_text_complete.evaluate(document, XPathConstants.NODESET));
				
				
				ArrayList<Integer> start_ii = new ArrayList<Integer>();
				ArrayList<Integer> end_ii = new ArrayList<Integer>();
				HashMap<Integer, String> char_index_to_mention_id = new HashMap<Integer, String>();
				for(int i = 0;i<entity_text_nodes.getLength();i++)
				{
					Node entity_text_node = entity_text_nodes.item(i);
					String  head_entity_mention_text = entity_text_node.getTextContent();
					
					Node entity_text_node_complete = entity_text_nodes_complete.item(i);
					String complete_entity_mention_text = entity_text_node_complete.getTextContent();
					
//					String complete_entity_mention_text = entity_text_node.getParentNode().getNextSibling().getFirstChild().getTextContent();
//					String complete_entity_mention_text = entity_text_node.getParentNode().getParentNode().getFirstChild().getTextContent();
					
					
					Node entity_node = entity_text_node.getParentNode().getParentNode().getParentNode();
					String entity_type = ((Element)entity_node).getAttribute("TYPE");
					String entity_id = ((Element)entity_node).getAttribute("ID");
					
					Node entity_mention_node = entity_text_node.getParentNode().getParentNode();
					String entity_mention_id = ((Element)entity_mention_node).getAttribute("ID");
//					entity_mention_id = entity_id + "_" + entity_mention_id;
					
					int start_index = Integer.parseInt(((Element)entity_text_node).getAttribute("START"));
					if(checkForHL)
						start_index = start_index + 10;
					start_ii.add(start_index);
					int end_index = Integer.parseInt(((Element)entity_text_node).getAttribute("END"));
					if(checkForHL)
						end_index = end_index + 10;
					end_ii.add(end_index);
//					String start_end = ((Element)entity_text_node).getAttribute("START") + "_" + ((Element)entity_text_node).getAttribute("END");
					String start_end = start_index + "_" + end_index;
					for(int xx = start_index;xx<=end_index;xx++)
					{
						char_index_to_mention_id.put(xx, entity_mention_id);
					}
//					System.out.println("\nStart = " + start_index + "\tEnd = " + end_index);
//					System.out.println("Entity text = " + ((Element)entity_text_node).getTextContent());
//					System.out.println("File substring = " + content.substring(start_index, end_index+1));
//					System.out.println("Entity Type = " + entity_type);
//					System.out.println("Entity Mention ID = " + entity_mention_id);
					
					if(!start_end_to_entity_type.containsKey(start_end))
					{
						start_end_to_entity_type.put(start_end, entity_type);
						start_end_to_entity_mention_id.put(start_end, entity_mention_id);
						if(!start_to_end.containsKey(start_index))
							start_to_end.put(start_index, end_index);
						else
							System.out.println("Multiple entity types starting from " + start_index);
					}
					else
					{
						if(start_end_to_entity_type.get(start_end).equals(entity_type))
						{
							System.out.println("Multiple entity types for " + start_end);
							System.out.println("\t" + start_end_to_entity_type.get(start_end) + "\t" + entity_type);
						}
					}
					
					for(int j = start_index;j<=end_index;j++)
					{
						entity_types[j] = entity_type;
						entity_mention_ids[j] = entity_mention_id;
					}
					
					entity_mention_id_to_complete_mention.put(entity_mention_id, complete_entity_mention_text);
				}
				
				System.out.println("#Entity Mentions Annotated : " + entity_mention_id_to_complete_mention.keySet().size() + "\t" + start_end_to_entity_type.keySet().size());
				
				String start_end_key = "";
				int valid_till = -1;
				HashSet<String> covered_start_end = new HashSet<String>();
				String content_temp = content.replaceAll("([a-z]{2,})\\.([A-Za-z]{2,})", "$1. $2");
//				content_temp = content_temp.replaceAll("^ zzzzzzz. ","");
				ArrayList<String[][]> al_sentences = adr.process_document(content_temp, speakers);
				
				for(int i = 0, sno=0, wno = 0;i<content.length();)
				{
					if(i > valid_till)
						valid_till = -1;
					if(wno >= al_sentences.get(sno).length)
					{
						sno = sno + 1;
						wno = 0;
					}
					if(sno >= al_sentences.size())
						break;
					try{
					String curr_word = al_sentences.get(sno)[wno][0];
					if(curr_word.equals("-LRB-"))
						curr_word = "(";
					else if(curr_word.equals("-RRB-"))
						curr_word = ")";
					else if(curr_word.equals("-LSB-"))
						curr_word = "[";
					else if(curr_word.equals("-RSB-"))
						curr_word = "]";
					else if(curr_word.equals("-LCB-"))
						curr_word = "{";
					else if(curr_word.equals("-RCB-"))
						curr_word = "}";
					else if(curr_word.equals("``") && !content.substring(i,i+5).contains("``"))
						curr_word = "\"";
					else if(curr_word.equals("''") && !content.substring(i,i+5).contains("''"))
						curr_word = "\"";
					else if(curr_word.equals("'s"))
						curr_word = "s";
					else if(curr_word.equals("`") && !content.substring(i,i+5).contains("`"))
						curr_word = "'";
					else if(curr_word.contains("&") && !curr_word.equals("&") & content.contains("&amp;"))
						curr_word = curr_word.replaceAll("&", "&amp;");
					else if(curr_word.startsWith("7") && curr_word.endsWith("1/2"))
						curr_word = "7 1/2";
					else if(curr_word.startsWith("2") && curr_word.endsWith("1/2"))
						curr_word = "2 1/2";
					else if(curr_word.equals("...") && (inputFile.getName().startsWith("FLOPPINGACES_20041116.0833.027")
							|| inputFile.getName().startsWith("FLOPPINGACES_20041228.0927.010")))
						curr_word = ". . .";
					else if(curr_word.equals(".") && wno > 0 
							&& al_sentences.get(sno)[wno-1][0].endsWith(".")
							&& !al_sentences.get(sno)[wno-1][0].equals("..."))
					{
						wno = wno + 1;
					}
					
					if((i+curr_word.length()) < content.length() 
							&& content.substring(i, i+curr_word.length()).equals(curr_word))
					{
						if(start_to_end.containsKey(i))
						{
							int end_index = start_to_end.get(i);
							valid_till = end_index;
							start_end_key = i + "_" + end_index;
							al_sentences.get(sno)[wno][5] = start_end_to_entity_type.get(start_end_key);
							al_sentences.get(sno)[wno][6] = start_end_to_entity_mention_id.get(start_end_key);
							covered_start_end.add(start_end_key);
						}
						else if(curr_word.contains("-") 
								&& (Character.isAlphabetic(curr_word.charAt(0)) 
										|| Character.isDigit(curr_word.charAt(0))))
						{
							int temp_index = i+curr_word.lastIndexOf('-')+1;
							if(start_to_end.containsKey(temp_index))
							{
								int end_index = start_to_end.get(temp_index);
								valid_till = end_index;
								start_end_key = temp_index + "_" + end_index;
								al_sentences.get(sno)[wno][5] = start_end_to_entity_type.get(start_end_key);
								al_sentences.get(sno)[wno][6] = start_end_to_entity_mention_id.get(start_end_key);
								covered_start_end.add(start_end_key);
							}
						}
						else if(i < valid_till)
						{
							al_sentences.get(sno)[wno][5] = start_end_to_entity_type.get(start_end_key);
							al_sentences.get(sno)[wno][6] = start_end_to_entity_mention_id.get(start_end_key);
							covered_start_end.add(start_end_key);
						}
						wno = wno + 1;
						i = i + curr_word.length();
					}
//					else if(curr_word.contains("-") && Character.isAlphabetic(curr_word.charAt(0))
//							&& i > 0 && content.charAt(i-1) == '-')
//					{
//						curr_word = curr_word.substring(curr_word.indexOf('-')+1);
//						if((i+curr_word.length()) < content.length() && content.substring(i, i+curr_word.length()).equals(curr_word))
//						{
//							if(start_to_end.containsKey(i))
//							{
//								int end_index = start_to_end.get(i);
//								valid_till = end_index;
//								start_end_key = i + "_" + end_index;
//								al_sentences.get(sno)[wno][5] = start_end_to_entity_type.get(start_end_key);
//								al_sentences.get(sno)[wno][6] = start_end_to_entity_mention_id.get(start_end_key);
//								covered_start_end.add(start_end_key);
//							}
//							else if(i < valid_till)
//							{
//								al_sentences.get(sno)[wno][5] = start_end_to_entity_type.get(start_end_key);
//								al_sentences.get(sno)[wno][6] = start_end_to_entity_mention_id.get(start_end_key);
//								covered_start_end.add(start_end_key);
//							}
//							wno = wno + 1;
//							i = i + curr_word.length();
//						}
//					}
					else
					{
						i = i + 1;	
					}
					}
					catch(Exception etemp)
					{
						etemp.printStackTrace();
					}
				}
				
				num_mentions_missed = num_mentions_missed + (start_end_to_entity_mention_id.keySet().size()-covered_start_end.size());
				System.out.println("#Entity Mentiones covered : " + covered_start_end.size());
				for(String start_end : start_end_to_entity_mention_id.keySet())
				{
					if(!covered_start_end.contains(start_end))
					{
						System.out.println("Missed : " + start_end + "\t" + start_end_to_entity_mention_id.get(start_end) + "\t" + start_end_to_entity_type.get(start_end_key) + "\t" + content.substring(Integer.parseInt(start_end.split("_")[0]), Integer.parseInt(start_end.split("_")[1])));
					}
				}
				
System.out.println("outputFile=" + baseDir + "/" + inputFile.getName().replaceAll("sgm$", ".txt"));

				FileWriter outputFile = new FileWriter(baseDir + "/" + inputFile.getName().replaceAll("sgm$", ".txt"));
				
				boolean HL_found = false;
				for(int i = 0;i<al_sentences.size();i++)
				{
					String[][] curr_sentence = al_sentences.get(i);
					if(!checkForHL || HL_found)
					{
						for(int j = 0; j<curr_sentence.length; j++)
						{
							String temp_line = "";
							for(int k = 0;k<curr_sentence[j].length;k++)
								temp_line = temp_line + "\t" + curr_sentence[j][k];
							temp_line = temp_line.trim();
//							System.out.print(temp_line + "\n");
							outputFile.write(temp_line + "\n");
						}
					}
					if(curr_sentence.length > 1 
							&& curr_sentence[curr_sentence.length-2][0].equals("zzzzzzz"))
						HL_found = true;
//					System.out.println();
					outputFile.write("\n");
				}
				outputFile.close();
		    }
		    
		    for(String entity_mention_id : entity_mention_id_to_complete_mention.keySet())
			{
				outputMappingFile.write(entity_mention_id+"\t"+entity_mention_id_to_complete_mention.get(entity_mention_id).replaceAll("[\r\n]", " ") + "\n");
			}
		    outputRelationFile.close();
		    outputEventFile.close();
		    outputMappingFile.close();
		    
			System.out.println("#Mentions missed : " + num_mentions_missed);
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

//	    for (int i = 0; i < nodeList.getLength(); i++) {
//
//	      //We have encountered an <employee> tag.
//	      Node node = nodeList.item(i);
//	      if (node instanceof Element) {
////	        emp.id = node.getAttributes().
////	            getNamedItem("id").getNodeValue();
////	    	  System.out.println(node.getAttributes().getNamedItem("ID").getNodeValue());
//	    	  System.out.println(node.getNodeName());
//
//	        NodeList childNodes = node.getChildNodes();
//	        for (int j = 0; j < childNodes.getLength(); j++) {
//	          Node cNode = childNodes.item(j);
//	          System.out.println("\t"+cNode.getNodeName());
//	            }
//	          }
//	        }
	      }
	
	public ArrayList<String> process_sentence(ArrayList<String> curr_sentence)
	{
		String sentence = "";
		for(String word : curr_sentence)
			sentence = sentence + " " + word;
		sentence = sentence.trim();
		ArrayList<String> processedSent = new ArrayList<String>();
		ArrayList<String> words = new ArrayList<String>();
		ArrayList<String> postags = new ArrayList<String>();
		ArrayList<String> governors = new ArrayList<String>();
		ArrayList<String> dep_rel_types = new ArrayList<String>();
		String word, pos;
		String reln, gov, dep;
		try
		{
			Annotation document = new Annotation(sentence);
		   	pipeline.annotate(document);
		   	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
		   	CoreMap currSent = sentences.get(0);
		   	
		   	for (CoreLabel token: currSent.get(TokensAnnotation.class)) 
		   	{
	   			word = token.get(TextAnnotation.class);
	   			word = word.toLowerCase();
	   			pos = token.get(PartOfSpeechAnnotation.class);
	   			
	   			words.add(word);
	   			postags.add(pos);
	   			governors.add("null");
	   			dep_rel_types.add("null");
	   	    }
		   	
		   	System.out.println(words);
		   	
		   	SemanticGraph dependencies = currSent.get(CollapsedCCProcessedDependenciesAnnotation.class);
	   	    Collection<TypedDependency> tdList = dependencies.typedDependencies();
	   	    for(TypedDependency td : tdList)
	   	    {
	   	    	reln = td.reln().toString();
	   	    	gov = td.gov().toString();
	   	    	gov = gov.toLowerCase();
	   	    	
	   	    	
	   	    	dep = td.dep().toString() + "-" + td.dep().index();
	   	    	dep = dep.toLowerCase();
	   	    	
	   	    	governors.set(td.dep().index()-1,gov.split("/")[0]);
	   	    	dep_rel_types.set(td.dep().index()-1, reln);
	   	    }
	   	    
	   	    for(int i = 0;i<words.size();i++)
	   	    {
	   	    	String sent_repr = words.get(i)+"\t"+postags.get(i)+"\t"+governors.get(i)+"\t"+dep_rel_types.get(i);
	   	    	System.out.println(sent_repr);
	   	    	processedSent.add(sent_repr);
	   	    }
		}
		catch(Exception ex)
		{
			System.out.println(ex.toString());
			StackTraceElement[] st = ex.getStackTrace();
			for(int i = 0;i<st.length;i++)
			{
				System.out.println(st[i].toString());
			}
		}
		return (processedSent);
	}
	
	public ArrayList<String[][]> process_document(String doc_content, HashSet<String> speakers)
	{
		ArrayList<String[][]> al_sentences = new ArrayList<String[][]>();
		doc_content = doc_content.trim();
		String word, pos;
		String reln, gov, dep;
		try
		{
			Annotation document = new Annotation(doc_content);
		   	pipeline.annotate(document);
		   	List<CoreMap> sentences = document.get(SentencesAnnotation.class);
		   	for(CoreMap currSent : sentences)
		   	{
		   		ArrayList<String> processedSent = new ArrayList<String>();
				ArrayList<String> words = new ArrayList<String>();
				ArrayList<String> postags = new ArrayList<String>();
				ArrayList<String> governors = new ArrayList<String>();
				ArrayList<String> dep_rel_types = new ArrayList<String>();
				ArrayList<String> gov_indices = new ArrayList<String>();
				
			   	for (CoreLabel token: currSent.get(TokensAnnotation.class)) 
			   	{
		   			word = token.get(TextAnnotation.class);
		   			pos = token.get(PartOfSpeechAnnotation.class);
		   			
		   			words.add(word);
		   			postags.add(pos);
		   			governors.add("null");
		   			dep_rel_types.add("null");
		   			gov_indices.add("null");
		   	    }
			   	
//			   	System.out.println(words);
			   	
			   	if(speakers.contains(words.get(0)))
			   	{
			   		String[][] sentence = new String[1][7];
			  
		   	    	sentence[0][0] = words.get(0);
		   	    	sentence[0][1] = postags.get(0);
		   	    	sentence[0][2] = "root";
		   	    	sentence[0][3] = "root";
		   	    	sentence[0][4] = "-1";
		   	    	sentence[0][5] = "o";
		   	    	sentence[0][6] = "o";
			   	    
			   	    al_sentences.add(sentence);
			   	    
			   	    String true_sentence = "";
			   	    for(int i = 1;i<words.size();i++)
			   	    	true_sentence = true_sentence + " " + words.get(i);
			   	    true_sentence = true_sentence.trim();
			   	    Annotation true_sent_annotation = new Annotation(true_sentence);
				   	pipeline.annotate(true_sent_annotation);
			   	    
			   	    words = new ArrayList<String>();
					postags = new ArrayList<String>();
					governors = new ArrayList<String>();
					dep_rel_types = new ArrayList<String>();
					gov_indices = new ArrayList<String>();
					
					currSent = true_sent_annotation.get(SentencesAnnotation.class).get(0);
					for (CoreLabel token: currSent.get(TokensAnnotation.class)) 
				   	{
			   			word = token.get(TextAnnotation.class);
			   			pos = token.get(PartOfSpeechAnnotation.class);
			   			
			   			words.add(word);
			   			postags.add(pos);
			   			governors.add("null");
			   			dep_rel_types.add("null");
			   			gov_indices.add("null");
			   	    }
			   	}
			   	
			   	SemanticGraph dependencies = currSent.get(CollapsedCCProcessedDependenciesAnnotation.class);
		   	    Collection<TypedDependency> tdList = dependencies.typedDependencies();
		   	    for(TypedDependency td : tdList)
		   	    {
		   	    	reln = td.reln().toString();
		   	    	gov = td.gov().toString();
		   	    	gov = gov.toLowerCase();
		   	    	   	
		   	    	dep = td.dep().toString() + "-" + td.dep().index();
		   	    	dep = dep.toLowerCase();
		   	    	
		   	    	governors.set(td.dep().index()-1,gov.split("/")[0]);
		   	    	gov_indices.set(td.dep().index()-1, Integer.toString(td.gov().index()-1));
		   	    	dep_rel_types.set(td.dep().index()-1, reln);
		   	    }
		   	    
		   	    String[][] sentence = new String[words.size()][7];
		   	    for(int i = 0;i<words.size();i++)
		   	    {
		   	    	sentence[i][0] = words.get(i);
		   	    	sentence[i][1] = postags.get(i);
		   	    	sentence[i][2] = governors.get(i);
		   	    	sentence[i][3] = dep_rel_types.get(i);
		   	    	sentence[i][4] = gov_indices.get(i);
		   	    	sentence[i][5] = "o";
		   	    	sentence[i][6] = "o";
		   	    }
		   	    al_sentences.add(sentence);
		   	}
		}
		catch(Exception ex)
		{
			System.out.println(ex.toString());
			StackTraceElement[] st = ex.getStackTrace();
			for(int i = 0;i<st.length;i++)
			{
				System.out.println(st[i].toString());
			}
		}
		return (al_sentences);
	}
	
	public static boolean check_whether_proper_sentence(ArrayList<String> rel_arg1_ids, ArrayList<String> rel_arg2_ids, ArrayList<String> sent_entity_mention_ids)
	{
		try
		{
			for(String emid : sent_entity_mention_ids)
			{
				if(rel_arg1_ids.contains(emid))
				{
					for(int i = 0;i<rel_arg1_ids.size();i++)
					{
						if(rel_arg1_ids.get(i).equals(emid))
						{
							if(!sent_entity_mention_ids.contains(rel_arg2_ids.get(i)))
							{
								System.out.println("Missing2: " + emid + "\t" + rel_arg2_ids.get(i));
								return(false);
							}
						}
					}
				}
				
				if(rel_arg2_ids.contains(emid))
				{
					for(int i = 0;i<rel_arg2_ids.size();i++)
					{
						if(rel_arg2_ids.get(i).equals(emid))
						{
							if(!sent_entity_mention_ids.contains(rel_arg1_ids.get(i)))
							{
								System.out.println("Missing1: " + rel_arg1_ids.get(i) + "\t" + emid);
								return(false);
							}
						}
					}
				}
			}
		}
		catch(Exception ex)
		{
			ex.printStackTrace();
		}
		return(true);
	}
}
