package szte.csd.baseline;

import java.util.Set;
import java.util.Vector;

import szte.nlputils.Util;
import szte.csd.Sentence;

/**
 * 
 * This fine-tuned hand-crafted rule-set was developed to CMC clinical NLP challenge in 2007.
 *
 */
public class ObesRuleBased implements RuleBasedCSD {
  protected Set<String> C, CD, N, ND, FD, Delimiter = null;
  protected Vector<String> statement;
  
  public boolean isAltered(Sentence sent, int pos) {
    if(Delimiter == null)
      init("dict/Obes_RuleBased/conditionWords", 
           "dict/Obes_RuleBased/conditionWordsBothDirection",
           "dict/Obes_RuleBased/negationWords",
           "dict/Obes_RuleBased/negationWordsBothDirection",
           "dict/Obes_RuleBased/delimiters");
    String[] tokens = sent.tokens.toArray(new String[0]);
    markScopes(tokens);
    return !statement.get(pos).equals("O");
  }
  
  public void init(String cond, String condDir, String neg, String negDir, String delimiter){
    Delimiter = Util.readFileToSet(delimiter);
    C = Util.readFileToSet(cond);
    CD = Util.readFileToSet(condDir);
    N = Util.readFileToSet(neg);
    ND = Util.readFileToSet(negDir);
    FD = Util.readFileToSet("dict/Obes_RuleBased/unknown");
  }

  private void markScopes(final String[] splits)
  {
    statement = new Vector<String>();
    for (int i = 0; i < splits.length; i++) statement.add("O");
    int back, forw;
    for (int i = 0; i < splits.length; i++)
    {
      if(!statement.get(i).equals("O")) continue;
      String tag = "FAMILY"; 
      if (FD.contains(splits[i]))
      {
        statement.set(i, tag);
        forw = i + 1;
        while((forw < splits.length) 
            && (!Delimiter.contains(splits[forw]))
            ){
          statement.set(forw, tag);
          forw++;
        }
      }
    }
    
    for (int i = 0; i < splits.length; i++)
    {
      if(!statement.get(i).equals("O")) continue;
      if(splits[i].startsWith("?")) statement.set(i, "SPEC");
      if(Delimiter.contains(splits[i])) statement.set(i, "DEL");
      String tag = "NEG"; 
      if(C.contains(splits[i]) || CD.contains(splits[i])) tag = "SPEC";
//      if(FD.contains(splits[i])) tag = "FAMILY";
      if (N.contains(splits[i]) || ND.contains(splits[i]) || C.contains(splits[i]) || CD.contains(splits[i]))
      { 
        statement.set(i, tag);
        forw = i + 1;
        while((forw < splits.length) && (!Delimiter.contains(splits[forw]))){
          if(statement.get(forw).equals("O"))
            statement.set(forw, tag);
          forw++;
        }
      }
      if (ND.contains(splits[i]) || CD.contains(splits[i]))
      {
        back = i - 1;
        while((back > 0) && (!Delimiter.contains(splits[back]))){
          if(statement.get(back).equals("O"))
            statement.set(back, tag);
          back--;
        }
      }
    }
  }

}
