package szte.csd.baseline;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Map;

import szte.csd.Sentence;

/**
 * 
 * This class uses the manually annotated BioScope corpus as a rule-based CSD (checks wheter the token in question is under a scope).
 * It works just for the CMC dataset as it is included in BioScope.
 * You can download BioScope at http://www.inf.u-szeged.hu/rgai/bioscope
 * You have to convert XML to TXT with <> scope bracketing.
 * 
 */
public class Bioscope implements RuleBasedCSD {
  protected Map<String, String> sentences;
  static protected PrintWriter log;
  
  public Bioscope(){
    String bs = "bioscope_clinical.txt";
    try{
      log = new PrintWriter("bioscope.log");
      BufferedReader file = new BufferedReader(new FileReader(bs));
      String line;
      sentences = new HashMap<String, String>();
      while ((line = file.readLine()) != null)
      {
        if(line.length()<3) continue;
        if(line.startsWith("TI  -") || line.startsWith("AB  -"))
        {
          line = line.substring(5).trim();
        }
        line = line.replaceAll("&lt;", "<");
        line = line.replaceAll("&gt;", ">");
        line = line.replaceAll("&apos;", "'");
        line = line.replaceAll("&quot;", "\"");
        String s = line.split("\t")[0];
        String original = s.replaceAll("[ \\(\\)<>\\[\\]]", "");
        String statement = ""; int d=0;
        for(int i=0;i<s.length();++i)
          if(s.charAt(i) == '(')
            d++;
          else if(s.charAt(i) == ')')
            d--;
          else if(d==0)
            statement += s.charAt(i);
        sentences.put(original.replaceAll("\\W+",""), statement);
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }
  
  public String find(String ss){
    String s = ss.replaceAll("[\\(\\)<>\\[\\]]", "#");;
    s = s.replaceAll("\\W+","");
    if(sentences.containsKey(s))
      return sentences.get(s);
    String state = "";
    for(String sent : sentences.keySet())
      if(s.startsWith(sent))
        state += sentences.get(sent);
      else if(s.endsWith(sent))
        state += sentences.get(sent);
      else if(sent.endsWith(s) || sent.startsWith(s))
        return sentences.get(sent);
    return state;
  }
  
  public boolean isAltered(Sentence sent, int pos) {
    String state = find(sent.original_sentence);
    if(state=="")
    {
      log.println("&"+sent.original_sentence+"&");
      return false;
    }
    return !state.toLowerCase().contains(sent.tokens.get(pos));
  }

}
