'''
This uniforme the fromat for common reader
'''

import pickle
import os
from tqdm import tqdm

if __name__ == '__main__':
    path = 'biais_bios'
    loading_path = os.path.join(path, 'raw')
    saving_path = os.path.join(path, 'bio_processed')
    os.makedirs(saving_path, exist_ok=True)
    with open(os.path.join(loading_path, 'dev.pickle'), "rb") as f:
        datas = pickle.load(f)
    job_label_to_id = dict()
    all_porfessions = list(set([data['p'] for data in datas]))
    print('Number of labels', len(all_porfessions))
    for index, profession in enumerate(all_porfessions):
        job_label_to_id[profession] = index
    protected_label_to_id = {'m': 1, 'f': 0}
    for id in ['dev', 'train', 'test']:
        with open(os.path.join(loading_path, '{}.pickle'.format(id)), "rb") as f:
            datas = pickle.load(f)

        lines = []
        for data in tqdm(datas, id):
            lines.append(data['hard_text'])
        print('Max', max([len(lines) for i in lines]))
        print('Min', min([len(lines) for i in lines]))
        print('Mean', sum([len(lines) for i in lines]) / len(lines))
        print('Opening', id)

        protected_label_to_id = {'m': 1, 'f': 0}
        l = []
        assert len(datas) == len(lines)
        for index, data in enumerate(datas):
            line_processed = ''.join([str(i) for i in lines[index]])
            l.append(
                '{}\t{}\t{}\n'.format(line_processed, job_label_to_id[data['p']], protected_label_to_id[data['g']]))

        with open(os.path.join(saving_path, '{}.txt'.format(id)), "w") as file:
            file.write('text\tmain attribute\tprivate attribute\n')
            file.writelines(l)
