package utils;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

public class Utils 
{
	public static boolean verbose = false;
	
	public static StringBuilder aString = new StringBuilder();
	public static StringBuilder eString = new StringBuilder();
	public static StringBuilder iString = new StringBuilder();
	public static StringBuilder oString = new StringBuilder();
	public static StringBuilder uString = new StringBuilder();
	public static StringBuilder tString = new StringBuilder();
	public static StringBuilder sString = new StringBuilder();
	public static StringBuilder gString = new StringBuilder();
	public static StringBuilder zString = new StringBuilder();
	public static StringBuilder rString = new StringBuilder();		
	public static StringBuilder cString = new StringBuilder();		
	public static StringBuilder nString = new StringBuilder();		
	public static StringBuilder yString = new StringBuilder();

	/**
	 * build RegEx for discarding diacritics
	 */
	static
	{
		aString.append("[");
		aString.append((char)259);
		aString.append((char)257);
		aString.append((char)261);
		aString.append((char)513);
		aString.append((char)228);
		aString.append((char)227);
		aString.append((char)226);
		aString.append((char)225);
		aString.append((char)224);
		aString.append("]");

		eString.append("[");
		eString.append((char)275);
		eString.append((char)277);
		eString.append((char)283);
		eString.append((char)281);
		eString.append((char)235);
		eString.append((char)234);
		eString.append((char)233);
		eString.append((char)232);
		eString.append("]");

		iString.append("[");
		iString.append((char)8145);
		iString.append((char)297);
		iString.append((char)301);
		iString.append((char)239);
		iString.append((char)238);
		iString.append((char)237);
		iString.append((char)299);
		iString.append("]");

		oString.append("[");
		oString.append((char)337);
		oString.append((char)466);
		oString.append((char)335);
		oString.append((char)333);
		oString.append((char)491);
		oString.append((char)490);
		oString.append((char)246);
		oString.append((char)244);
		oString.append((char)242);
		oString.append((char)243);
		oString.append((char)212);
		oString.append("]");

		uString.append("[");
		uString.append((char)369);
		uString.append((char)252);
		uString.append((char)250);
		uString.append((char)251);
		uString.append((char)249);
		uString.append((char)365);
		uString.append((char)363);
		uString.append((char)218);
		uString.append("]");

		tString.append("[");
		tString.append((char)539);
		tString.append((char)538);
		tString.append((char)355);
		tString.append((char)254);
		tString.append("]");

		sString.append("[");
		sString.append((char)537);
		sString.append((char)353);
		sString.append((char)347);
		sString.append((char)351);
		sString.append((char)186);
		sString.append("]");


		gString.append("[");
		gString.append((char)287);
		gString.append("]");

		nString.append("[");
		nString.append((char)324);
		nString.append((char)241);
		nString.append("]");

		cString.append("[");
		cString.append((char)263);
		cString.append((char)269);
		cString.append((char)231);
		cString.append("]");

		rString.append("[");
		rString.append((char)7773);
		rString.append("]");

		zString.append("[");
		zString.append((char)382);
		zString.append("]");

		yString.append("[");
		yString.append((char)253);
		yString.append("]");

	}

	/**
	 * remove diacritics from input word
	 * @param word string for which diacritics are discarded
	 * @return word without diacritics
	 */
	public static String removeDiacritics(String word)
	{
		word = word.replaceAll(aString.toString(), "a");
		word = word.replaceAll(eString.toString(), "e");
		word = word.replaceAll(iString.toString(), "i");
		word = word.replaceAll(oString.toString(), "o");
		word = word.replaceAll(uString.toString(), "u");
		word = word.replaceAll(tString.toString(), "t");
		word = word.replaceAll(sString.toString(), "s");
		word = word.replaceAll(gString.toString(), "g");
		word = word.replaceAll(nString.toString(), "n");
		word = word.replaceAll(cString.toString(), "c");
		word = word.replaceAll(rString.toString(), "r");
		word = word.replaceAll(yString.toString(), "y");
		word = word.replaceAll(zString.toString(), "z");

		return word;
	}
	
	/**
	 * remove diacritics for all words in input file
	 * @param inputPath file for which diacritics should be discarded
	 * @param outputPath file containing input words without diacritics
	 * @throws IOException
	 */
	public static void removeDiacriticsFromTxtFile(String inputPath, String outputPath) throws IOException
	{
		BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(new File(inputPath)), Charset.forName("UTF8")));
		BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(outputPath)), Charset.forName("UTF8")));
		
		String word;
		
		while ((word = in.readLine()) != null)
		{
			out.write(removeDiacritics(word) + "\n");
		}
		
		in.close();
		out.close();
	}
	
	/**
	 * read input file and return a list of lines in input file
	 * @param path input file
	 * @return list of lines in input file
	 */
	public static List<String> getLines(String path) 
	{
		String line;
		List<String> lines = new ArrayList<String>();
		
		try
		{
			BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(new File(path)), Charset.forName("UTF8")));
	
			while ((line = in.readLine()) != null)
				lines.add(line);
	
			in.close();
		}
		catch (IOException e)
		{
			System.out.println("Error while reading file '" + path + "'");
			
			if (verbose)
				e.printStackTrace();
		}
		
		return lines;
	}
}