import jieba


def get_clear_text(text):
    text = text.strip().split()
    return ''.join(text)


if __name__ == '__main__':
    """
    Evaluate performance of Enum and MRC model on OTE.
    """

    data_dir = r'../data/'  # Root data dir
    language = 'chinese'  # language: ['english', 'chinese']
    result_file = 'enumerate_2100.results'  # Result file
    model = 'enum'  # Model to evaluate: ['enum', 'mrc']

    if model == 'enum':
        try:
            with open(data_dir + result_file, 'r', encoding='utf-8') as f:
                lines = f.readlines()
                aspect_opinions = [[line.strip().split('\t')[0], line.strip().split('\t')[2], line.strip().split('\t')[1]] for line in lines if len(line.strip().split('\t')) == 3]  # opinion text, pred aspect
        except FileNotFoundError:
            print('There is no ' + data_dir + result_file)

    if model == 'mrc':
        try:
            with open(data_dir + result_file, 'r', encoding='utf-8') as f:
                lines = f.readlines()
                # opinion, pred_aspect, gold_aspect, event
                aspect_opinions = [[line.strip().split('\t')[3], line.strip().split('\t')[2], line.strip().split('\t')[1], line.strip().split('\t')[0]] for line in lines]
        except FileNotFoundError:
            print('There is no ' + data_dir + result_file)

    total_opinion = 0
    corr_opinion = 0
    for opinion in aspect_opinions:
        if opinion[2].strip() == 'O':
            continue
        if get_clear_text(opinion[1].strip()) == get_clear_text(opinion[2].strip()):
            corr_opinion += 1
        total_opinion += 1
    print('Segment level Aspect Extraction: ', corr_opinion / total_opinion, corr_opinion, total_opinion)

    ground_word_num = 0
    pred_word_num = 0
    overlap_word_num = 0
    for opinion in aspect_opinions:
        if language == 'english':
            pred_aspect = opinion[1].strip().split()
            gold_aspect = opinion[2].strip().split()
        else:
            pred_aspect = list(jieba.cut(opinion[1]))
            gold_aspect = list(jieba.cut(opinion[2]))
        overlap_word = [i for i in pred_aspect if i in gold_aspect]
        overlap_word_num += len(overlap_word)
        ground_word_num += len(gold_aspect)
        pred_word_num += len(pred_aspect)
    r = overlap_word_num / ground_word_num
    p = overlap_word_num / pred_word_num
    f = 2 * r * p / (r + p)
    print('Overlapping Aspect Extraction: ', p, r, f)


