from collections import OrderedDict
class IndexedFeature:
    def __init__(self):
        self.data = {}

    def add(self, k, v=1.0):
        self.data[k] = v
    
    def __getitem__(self, k):
        return self.data.get(k, 0.)
    
    def __repr__(self):
        return self.__str__()

    def __str__(self):
        return str(self.data)

class FeatureVocab:
    def __init__(self):
        self.feat_to_id = {}
        self.id_to_feat = {}

    def __getitem__(self, word):
        return self.feat_to_id.get(word, -1)

    def __contains__(self, word):
        return word in self.feat_to_id

    def __len__(self):
        return len(self.feat_to_id)

    def size(self):
        return len(self.feat_to_id)

    def get_word(self, wid):
        return self.id_to_feat[wid]

    def get_names(self):
        return [self.id_to_feat[i] for i in range(len(self))]

    def add(self, word):
        if word not in self:
            wid = self.feat_to_id[word] = len(self)
            self.id_to_feat[wid] = word
            return wid

def normalize_raw_prediction(preds):
    
    norm_pred = OrderedDict()
    # id, prob yes, logit yes
    for p in preds:
        yes_prob = 0.0
        no_prob = 0.0
        for val in preds[p]:
            if val['text'] == 'yes':
                yes_prob = val['probability']
            if val['text'] == 'no':
                no_prob = val['probability']
        if yes_prob == 0.0 or no_prob == 0.0:
            raise RuntimeError(' missing probability')
        # print(yes_prob, no_prob)
        yes_prob = yes_prob / (yes_prob + no_prob)
        # if 0.9 < yes_prob and yes_prob < 0.1:
        #     print(p, yes_prob)
        norm_pred[p] = yes_prob
    return norm_pred

def prepro_data(raw_data):
    dataset = raw_data['data']
    
    data = OrderedDict()
    for d in dataset:
        id = d['title']
        par = d['paragraphs'][0]
        qa = par['qas'][0]
        context = par['context']
        gt = qa['answers'][0]['text']
        question = qa['question']
        
        data[id] = {'id': id, 'question': question, 'context': context, 'gt': gt}
    return data
