package szte.csd.baseline;

import java.util.Set;
import java.util.regex.Pattern;

import szte.nlputils.Util;

import szte.csd.Sentence;

/**
 * 
 * The "content shifted sentence baseline".
 * It looks for content shifting phrases and if a sentences contains any, the whole sentence will be regarded as "altered"
 * The phrases were collected from Chapman et al. (2007);Light et al. (2004); Vincze et al. (2008);Farkas and Szarvas (2008) and Farkas et al. (2009)
 *
 */
public class SentenceRemover implements RuleBasedCSD {
  protected Set<String> externalmodifier;
  protected String prev_sentence = null;
  protected boolean prev_result;
  
  public boolean isAltered(Sentence sent, int pos) {
    if(sent.original_sentence.equals(prev_sentence))
      return prev_result;
    prev_sentence = sent.original_sentence;
    for(String em : externalmodifier)
    {
      Pattern p = Pattern.compile("[ .,:()]"+em+"[ .,:()]");
      String s = " " + sent.original_sentence.toLowerCase() + " ";
      if(p.matcher(s).find())
      {
        prev_result = true;
        return true;
      }
    }
    prev_result = false;
    return false;
  }
  
  public SentenceRemover(String task){
    externalmodifier = Util.readFileToSet("dict/modifier_external");
    if(!task.equals("cmc"))
      externalmodifier.addAll(Util.readFileToSet("dict/modifier_cmc"));
    if(!task.equals("cmc") && !task.equals("obes"))
      externalmodifier.addAll(Util.readFileToSet("dict/modifier_obes"));
  }
}
