import argparse
import logging
import os
import sys

from transformers import BertTokenizer

from few_shot_ner.dataset import MetaDataSet
from few_shot_ner.logger import init_root_logger


def main(args):
    if os.environ.get('SM_OUTPUT_DATA_DIR') is not None:
        args.log_file = os.path.join(os.environ.get('SM_OUTPUT_DATA_DIR'), 'log.txt')
    logger = init_root_logger(args.log_file, level=args.verbose)
    for name, value in vars(args).items():
        logger.info("%s : %s" % (name, str(value)))
    if os.path.exists(args.alphabets_folder):
        logger.error("{} already exists, delete it to recreate the alphabets.".format(args.alphabets_folder))
        sys.exit(1)
    # Load pre-trained BERT tokenizer
    tokenizer = BertTokenizer.from_pretrained(args.bert_model_path)
    # Load data
    data = MetaDataSet(args.alphabets_folder, args.data, MetaDataSet.train, tokenizer, args.max_token_length,
                       {MetaDataSet.ner, MetaDataSet.ic})
    logger.info("Alphabets wrote to : {}".format(args.alphabets_folder))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Meta-learn a BERT initialization Model for NER',
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("--data", type=str, help='Path to the data (concatenation of train, dev, test)')
    parser.add_argument('--bert_model_path', type=str, help='Name or path of the bert model')
    parser.add_argument('--alphabets_folder', type=str, help='Folder to store alphabets files')
    parser.add_argument('--log_file', type=str, default=None, help='Path to the log file')
    parser.add_argument('--verbose', type=int, default=logging.INFO, help='Verbosity of logging')
    parser.add_argument('--max_token_length', type=int, default=40, help='Padding length')
    main(parser.parse_args())
