package szte.csd.baseline;

import java.util.HashSet;
import java.util.Set;

import szte.nlputils.Util;

import szte.csd.Sentence;

/**
 * 
 * This fine-tuned hand-crafted rule-set was developed to CMC clinical NLP challenge in 2007.
 *
 */
public class CMCRuleBased implements RuleBasedCSD { 
  protected static Set<String> conditionWords = new HashSet<String>();
  protected static Set<String> negationWords = new HashSet<String>();

  public CMCRuleBased(){
    negationWords = Util.readFileToSet("dict/CMC_RuleBased/negationWords");
    conditionWords= Util.readFileToSet("dict/CMC_RuleBased/conditionWords");
  }
  
  public boolean isAltered(Sentence sent, int pos) {
    String statement = getStatement(sent.original_sentence).toLowerCase();
    return !statement.contains(sent.tokens.get(pos).toLowerCase());
  }
  
  protected String getStatement(String doc)
  {
    String[] tokens = doc.toLowerCase().split(" ");
    String res = "";
    boolean containsOR = false;
    String sentence="";
    boolean out = false;
    for(int i=0;i<tokens.length;++i)
    {
      if (tokens[i].equalsIgnoreCase("or") || tokens[i].equalsIgnoreCase("can") || tokens[i].equalsIgnoreCase("cannot")
          || tokens[i].equalsIgnoreCase("vs")
            ) {
        containsOR = true;
        String cut = sentence.substring(Math.max(sentence.lastIndexOf(","),0));
        sentence = sentence.substring(0,Math.max(sentence.lastIndexOf(","),0));
        while (!(tokens[i].indexOf(".")>-1 || tokens[i].indexOf(",")>-1)) {
          cut+= " "+tokens[i];
          i++;
        }
        out = true;
      if (i>=tokens.length) continue;
      }
      if(negationWords.contains(tokens[i].toLowerCase()) || conditionWords.contains(tokens[i].toLowerCase()))
        out = true;
      else if(out && tokens[i].endsWith(".")) {
          out = false;
            res += " " + sentence;
          sentence = "";
          containsOR = false;
      }
      else if(!out && tokens[i].endsWith(".")) {
        sentence += " " + tokens[i];
          res += " " + sentence;
          sentence = "";
          containsOR = false;
      }
      else if(!out)
        sentence += " " + tokens[i];
    }
    return res;
  }
}
