"""Perform statistical analysis of the dataset."""


from preprocess.annotation import _OPERATORS



def stats_operators(read_dict):
    """Statistically report the distribution of valid operators."""
    counts = {op: 0 for op in _OPERATORS}

    for table_id, dual_dict in read_dict.items():
        for sent in dual_dict['sentences']:
            for subsent in sent['sub_samples']:
                for op in subsent['aggregation']:
                    counts[op] += 1
    print(f'Parsed Operators from Annotation Counts: {counts}')


def stats_answer_length(read_dict):
    """Count the number of answer items."""
    counts = {}
    for table_id, dual_dict in read_dict.items():
        for idx, sent in enumerate(dual_dict['sentences']):
            for subsent in sent['sub_samples']:
                answer_length = len(subsent['answer_cells'])

                if answer_length not in counts:
                    counts[answer_length] = 0
                counts[answer_length] += 1
    print(f'Parsed AnswerNum from Annotation Counts: {counts}')

