"""Main / General procedure to read the annotation from a table-id.xlsx file."""

import os
import argparse
from openpyxl import load_workbook

from processing import _OPERATORS
from processing.annotation.block import get_annotated_blocks
from processing.annotation.subblock import get_sub_samples


def load_workbooks(filename):
    """Return the 1) default and 2) literal versions of the worksheet.
    Within files have two sheets named 'labeling' and 'original'.
    """
    workbook = load_workbook(filename)
    worksheet = workbook['labeling']

    workbook_literal = load_workbook(filename, data_only=True)
    worksheet_literal = workbook_literal['labeling']

    return worksheet, worksheet_literal


def read_table_anno(filename, operator=None):
    """Extract from a .xlsx annotation multiple samples.
    step 1. load workbook-raw and workbook-literal
    step 2. split rows into sentence blocks
    step 3. parse the annotation of each block

    Args:
        filename: f'{table_id}.xlsx'
        operator: one of the _OPERATORS
    Return:
        block_samples: List[sent_sample]
        block_messages: List[sent_error_msg]
    """
    worksheet, worksheet_literal = load_workbooks(filename)
    block_starts = get_annotated_blocks(worksheet)

    block_samples, block_messages = [], []
    if operator is not None and operator not in _OPERATORS:
        print(f'OP [{operator}] not supported, skip file [{filename}]...')
        return block_samples

    for block_start in block_starts:
        block_dict = get_sub_samples(block_start, worksheet, worksheet_literal, operator)
        block_samples.append(block_dict)

    return block_samples



# %% main

def main(args):
    # Select Valid and Annotated Tables 
    annotated_files = os.listdir(args.annotated_path)
    valid_table_files = os.listdir(args.valid_table_path)
    valid_annotated_files = [
        af         # os.path.join(args.annotated_path, af) 
        for af in annotated_files
        if f"{af.split('.')[0]}.json" in valid_table_files
    ]

    # Iterate and Parse the Annotated Files   # 62
    dataset = {}
    for f in valid_annotated_files:
        table_id = f.split('.')[0]
        # print(f"\n\nTable ID: {table_id}")
        sample_dict = read_table_anno(
            filename=os.path.join(args.annotated_path, f) )
        dataset[table_id] = sample_dict

    return dataset    


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--root_dir', type=str, default='/mnt/USER/HMT-QA')
    parser.add_argument('--data_dir', type=str, default='DATA')
    parser.add_argument('--annotated_dir', type=str, default='annotations')
    parser.add_argument('--valid_table_dir', type=str, default='table_filtered')
    parser.add_argument('--operator', type=str, default=None)
    args = parser.parse_args()

    args.annotated_path = os.path.join(args.root_dir, args.data_dir, args.annotated_dir)
    args.valid_table_path = os.path.join(args.root_dir, args.data_dir, args.valid_table_dir)
    
    main(args)
