
/*******************************************************************/
/*                                                                 */
/*     File: wordclass.C                                           */
/*   Author: Helmut Schmid                                         */
/*  Purpose:                                                       */
/*  Created: Fri Jul 22 16:49:06 2005                              */
/* Modified: Thu Jan 15 09:28:04 2009 (schmid)                     */
/*                                                                 */
/*******************************************************************/

#include <string.h>
#include <stdlib.h>

#include "wordclass.h"


/*******************************************************************/
/*                                                                 */
/*  Automaton::Automaton                                           */
/*                                                                 */
/*******************************************************************/

Automaton::Automaton( FILE *file )

{
  int s, t;
  char label[100];
  char lastchar=0;
  int laststate=-1;

  number_of_classes = 0;
  while (fscanf(file,"%d%100s%d", &s, label, &t) == 3) {
    if (strcmp(label,"wordclass") == 0) {
      state[s].wordclass = t;
      if (number_of_classes <= t)
	number_of_classes = t+1;
      continue;
    }
    if (label[1] != 0) {
      fprintf(stderr,"Error: in transducer file: illegal symbol \"%s\"!",
	      label);
      exit(1);
    }
    if (laststate > s) {
      fprintf(stderr,"Error: in transducer file: states not ordered (%d,%d)!",
	      laststate, s);
      exit(1);
    }
    while (laststate < s) {
      laststate++;
      state.push_back(State());
      lastchar = 0;
    }
    if ((unsigned char)lastchar > (unsigned char)label[0]) {
      fprintf(stderr,"Error: in transducer file: symbols not ordered (%c,%c) at state %d!", lastchar, label[0], s);
      exit(1);
    }
    state[s].transition.push_back(Transition(label[0], t));
    lastchar = label[0];
  }
}


/*******************************************************************/
/*                                                                 */
/*  wordclass                                                      */
/*                                                                 */
/*******************************************************************/

int Automaton::wordclass( const char *string, int sn )

{
  if (string[0] == 0)
    return state[sn].wordclass;
  else {
    vector<Transition> &t=state[sn].transition;
    size_t l=0;
    size_t r=t.size();
    char c=string[0];
    while (l < r) {
      size_t m=(l+r)/2;
      if ((unsigned char)t[m].character < (unsigned char)c)
	l = m+1;
      else
	r = m;
    }

    if (l < t.size() && t[l].character == c)
      return wordclass( string+1, t[l].target);
    return 0; // default wordclass;
  }
}
