import os

from best.data_iterators import iter_best_files, iter_best_old_files
from itertools import chain

new_root = os.path.join(
    'data',
    'ldc2016e114',
    "data",
    "eng")

old_root = os.path.join(
    'data',
    'ldc2016e27v2',
    "data",
    "eng")

new_root = os.environ.get('BEST_PATH', new_root)
old_root = os.environ.get('BEST_PATH_OLD', old_root)


def test_iter_best_files():

    for doc in chain(
            iter_best_files(new_root),
            iter_best_old_files(old_root)):
        assert len(doc.source) > 0
        assert len(doc.evaluator_ere.entities) > 0
        assert len(doc.evaluator_best.sentiments) > 0
        assert len(doc.evaluator_best.beliefs) > 0


def test_pairs():
    for doc in chain(
            iter_best_files(new_root),
            iter_best_old_files(old_root)):
        entities = doc.evaluator_ere.entities
        entity_mentions = doc.evaluator_ere.entity_mentions

        seen_entities = {ent: False for ent in entities}
        seen_entities[None] = False

        seen_mentions = {mnt: False for mnt in entity_mentions}

        for src, trg in doc.pairs_entity:

            # check that trg is not a mention of src itself
            if src is not None:
                assert entity_mentions[trg] not in entities[src].mentions

            seen_entities[src] = True
            seen_mentions[trg] = True

        assert all(seen_entities.values())
        assert all(seen_mentions.values())
