import sys
from statistics import mean, median
from collections import Counter, defaultdict

from best.loaders import extract_mentions

new_path = 'data/ldc2016e114/data/eng'

from best.data_iterators import iter_best_files


docs = list(iter_best_files(new_path))
for doc in docs:

    flat_words = [entry[1] for sent in doc.conll for entry in sent]

    for mention in doc.evaluator_ere.entity_mentions.values():

        #  start, end = doc.offset_to_tokens(offset, length)
        a, b = doc.offset_to_flat_tokens(mention.offset, mention.length)
        if a != b:
            ref = mention.mention_text
            #  print("-{}-".format(ref))
            #  print("-{}-".format(doc.source[offset:offset + length]))
            #  continue
            #  got_span = " ".join(entry[1] for entry in doc.parsed_span(start, end))
            got = " ".join(flat_words[a:b])
            if got != ref:
                #  print(mention['text'])
                print("===")
                print(ref)
                print('--')
                print(got)
                #  print('--')
                #  print(got_span)
                print("===")
                print()
