package szte.nlputils;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Util {
  public static String readFileToString(String filename){
    return readFileToString(filename, Charset.defaultCharset().displayName());
  }

  public static String readFileToString(String filePath, String cEncoding){
    StringBuffer fileData = new StringBuffer(1000);
    try {
      BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), cEncoding));
      char[] buf = new char[1024];
      int numRead = 0;
      while ((numRead = reader.read(buf)) != -1) {
        String readData = String.valueOf(buf, 0, numRead);
        fileData.append(readData);
        buf = new char[1024];
      }
      reader.close();
    } catch (IOException e) {
      System.err.println("Problem with file: " + filePath);
      return new String();
    }
    return fileData.toString();
  }

  public static Set<String> readFileToSet(String filename) {
    try {
      BufferedReader file = new BufferedReader(new FileReader(filename));
      Set<String> set = new HashSet<String>();
      String line;
      while ((line = file.readLine()) != null)
        set.add(line.trim());
      file.close();
      return set;
    } catch (IOException e) {
      System.err.println("Problem with file: " + filename);
      return new HashSet<String>();
    }
  }

  public static String starterSubstring(String str, int n) {
    return str.substring(0, str.length() - n);
  }

  public static int hungarianTokenLength(String possibleLemma) {
    int toSubstract = 0;
    Matcher m = Pattern.compile("([glnt]y|[ds]z|[cz]s|dzs)").matcher(possibleLemma.toLowerCase());
    while (m.find()) {
      toSubstract++;
    }
    return possibleLemma.length() - toSubstract;
  }
}
