#
 #     MILIE: Modular & Iterative Multilingual Open Information Extraction
 #
 #
 #
 #     Authors: Deleted for purposes of anonymity
 #
 #     Proprietor: Deleted for purposes of anonymity --- PROPRIETARY INFORMATION
 #
 # The software and its source code contain valuable trade secrets and shall be maintained in
 # confidence and treated as confidential information. The software may only be used for
 # evaluation and/or testing purposes, unless otherwise explicitly stated in the terms of a
 # license agreement or nondisclosure agreement with the proprietor of the software.
 # Any unauthorized publication, transfer to third parties, or duplication of the object or
 # source code---either totally or in part---is strictly prohibited.
 #
 #     Copyright (c) 2021 Proprietor: Deleted for purposes of anonymity
 #     All Rights Reserved.
 #
 # THE PROPRIETOR DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR
 # IMPLIED, INCLUDING BUT NOT LIMITED TO IMPLIED WARRANTIES OF MERCHANTABILITY
 # AND FITNESS FOR A PARTICULAR PURPOSE AND THE WARRANTY AGAINST LATENT
 # DEFECTS, WITH RESPECT TO THE PROGRAM AND ANY ACCOMPANYING DOCUMENTATION.
 #
 # NO LIABILITY FOR CONSEQUENTIAL DAMAGES:
 # IN NO EVENT SHALL THE PROPRIETOR OR ANY OF ITS SUBSIDIARIES BE
 # LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES
 # FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF INFORMATION, OR
 # OTHER PECUNIARY LOSS AND INDIRECT, CONSEQUENTIAL, INCIDENTAL,
 # ECONOMIC OR PUNITIVE DAMAGES) ARISING OUT OF THE USE OF OR INABILITY
 # TO USE THIS PROGRAM, EVEN IF the proprietor HAS BEEN ADVISED OF
 # THE POSSIBILITY OF SUCH DAMAGES.
 #
 # For purposes of anonymity, the identity of the proprietor is not given herewith.
 # The identity of the proprietor will be given once the review of the
 # conference submission is completed.
 #
 # THIS HEADER MAY NOT BE EXTRACTED OR MODIFIED IN ANY WAY.
 #
import argparse
import logging
import os

from .transformer_heads import MODEL_CLASSES#, ALL_MODELS

LOGGER = logging.getLogger(__name__)


def get_arguments(parse=True):
    """
    Returns milie arguments.

    :return args: the milie command line arguments
    """
    parser = argparse.ArgumentParser()
    parser = __add_arguments(parser)
    #parser = __add_dataset_arguments(parser)
    if parse is True:
        args = parser.parse_args()
        #import pdb
        #pdb.set_trace()
        return args
    else:
        return parser


def __add_arguments(parser):
    # Required arguments
    parser.add_argument("--output_dir", default=None, type=str, required=True,
                             help="The output directory where all relevant files will be "
                                  "written to.")
    parser.add_argument("--data_set", type=str, required=True, nargs='+',
                        help="Which dataset(s) to expect.\n Options are: %s" %
                             ", ".join(['sharc', 'daily_dialog', 'bitext', 'cnndailymail',
                             'persona', 'aq_squad', 'sentence', 'msmarco',
                             'kyoto_gen', 'kyoto_lexicon', 'mmkg_fb15k', 'opiec']))
    parser.add_argument("--model_type", default=None, type=str, required=True,
                            help="Which model type to usee: " + ", ".join(MODEL_CLASSES.keys()))
    parser.add_argument("--model_name_or_path", default=None, type=str, required=True,
                        help="Path to pre-trained model or shortcut name selected in the list: "
                             #+ ", ".join(ALL_MODELS)
                             + "\n Leave empty to initialize a new model")

    # Other parameters
    parser.add_argument("--train_file", default=None, type=str, nargs='+',
                             help="Input file for training")
    parser.add_argument("--predict_file", default=None, type=str, nargs='+',
                             help="Input file for prediction.")
    parser.add_argument("--valid_gold", default=None, type=str, nargs='+',
                             help="Location of gold file for evaluating predictions.")
    parser.add_argument('--valid_every_epoch',
                             action='store_true',
                             help="Whether to validate on the validation set after every "
                                  "epoch, save best model according to the evaluation metric "
                                  "indicated by each specific dataset class.")
    parser.add_argument("--load_prev_model", default=None, type=str,
                             help="Provide a file location if a previous model should be "
                                  "loaded. (Note that Adam Optimizer paramters are lost.")
    parser.add_argument("--adam_schedule", default="warmup_linear", const="warmup_linear",
                             nargs='?', type=str,
                             choices=['warmup_linear', 'warmup_constant', 'warmup_cosine'],
                             help="Warm up schedule to use for Adam.")
    parser.add_argument("--max_seq_length", default=384, type=int,
                             help="The maximum total sequence length (Part A + B) after "
                                  "tokenization (incl. special tokens).")
    parser.add_argument("--max_part_a", default=64, type=int,
                             help="The maximum number of tokens for Part A. Sequences longer "
                                  "than this will be truncated to this length.")
    parser.add_argument("--truncation_strategy", type=str, default='longest_first',
                        choices=['longest_first', 'only_first', 'only_second', 'do_not_truncate'],
                        help="Truncation strategy")
    parser.add_argument("--do_train", action='store_true',
                            help="Should be true to run training.")
    parser.add_argument("--do_predict", action='store_true',
                             help="Should be true to run prediction.")
    parser.add_argument("--train_batch_size", default=32, type=int,
                             help="Total batch size for training. "
                                  "Actual batch size will be divided by "
                                  "gradient_accumulation_steps and clipped to closest int.")
    parser.add_argument("--predict_batch_size", default=8, type=int,
                             help="Batch size to use for predictions.")
    parser.add_argument("--learning_rate", default=5e-5, type=float,
                             help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs", default=3.0, type=float,
                             help="How many training epochs to run.")
    parser.add_argument("--warmup_proportion", default=0.1, type=float,
                             help="Proportion of training to perform linear learning rate "
                                  "warmup e.g., 0.1 = 10 percent of training.")
    parser.add_argument("--verbose_logging", action='store_true',
                             help="Log more information.")
    parser.add_argument("--no_cuda",
                             action='store_true',
                             help="Use CPU even if GPU is available.")
    parser.add_argument('--seed',
                             type=int,
                             default=42,
                             help="Random seed for initialization, "
                                  "set to -1 to draw a random number.")
    parser.add_argument('--gradient_accumulation_steps',
                             type=int,
                             default=1,
                             help="Number of updates steps to accumulate before performing "
                                  "a backward/update pass.")
    parser.add_argument("--do_lower_case",
                             action='store_true',
                             help="Whether to lower case the input text. "
                                  "Should be True for uncased models, False for cased models.")
    parser.add_argument("--local_rank",
                             type=int,
                             default=-1,
                             help="local_rank for distributed training on gpus")
    parser.add_argument('--fp16',
                             action='store_true',
                             help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument('--loss_scale',
                             type=float, default=0,
                             help="Loss scaling to improve fp16 numeric stability. "
                                  "Only used when fp16 set to True.\n"
                                  "0 (default value): dynamic loss scaling.\n"
                                  "Positive power of 2: static loss scaling value.\n")
    parser.add_argument('--max_grad_norm', type=float, default=1.0,
                             help="Set the value above which gradients will be clipped.")
    parser.add_argument("--config_name", default="", type=str,
                        help="Pretrained config name or path if not the same as model_name")
    parser.add_argument("--tokenizer_name", default="", type=str,
                        help="Pretrained tokenizer name or path if not the same as model_name")
    parser.add_argument("--sentencepiece_model_file", default="",
                        type=str, help="Path to sentenciepiece model file")

    # Pertaining BERT
    parser.add_argument("--vocab_size", type=int, default=30522,
                             help="The desired vocabulary size, only use in conjunction with "
                                  "bert-vanilla.")
    parser.add_argument("--no_basic_tok", type=bool,
                             help="If supplied, basic tokenization will not be performed "
                                  "(e.g. punctuation).")
    parser.add_argument("--output_hidden_states", type=bool,
                             help="If supplied, hidden states will be returned.")
    parser.add_argument("--output_attentions", type=bool,
                             help="If supplied, attention will be returned.")

    # Pertaining masking
    parser.add_argument("--mask_in_a", type=bool,
                             help="If supplied, masking (as set by masking_strategy, "
                                  "distribution_mean and distribution_stdev) is applied to part a.")
    parser.add_argument("--mask_in_b", type=bool,
                             help="If supplied, masking (as set by masking_strategy, "
                                  "distribution_mean and distribution_stdev) is applied to part b.")
    parser.add_argument("--max_gen_a_length", default=0, type=int,
                             help="Maximum length for output generation sequence (Part A).")
    parser.add_argument("--max_gen_b_length", default=0, type=int,
                             help="Maximum length for output generation sequence (Part B).")
    parser.add_argument("--masking_strategy", default=None, type=str, const=None,
                             nargs='?',
                             choices=['bernoulli', 'gaussian', 'dataset_dependent'],
                             help="Which masking strategy to us, options are: "
                                  "bernoulli, gaussian, dataset_dependent")
    parser.add_argument("--distribution_mean", default=1.0, type=float,
                             help="The mean (for Bernoulli and Gaussian sampling).")
    parser.add_argument("--distribution_stdev", default=0.0, type=float,
                             help="The standard deviation (for Gaussian sampling).")



    parser.add_argument("--plus_classify_tokens",
                        type=int, default=0,
                        help="How many token classification heads should be used, "
                             "number of labels should be set as a list in"
                             "data_handler.num_labels_tok and the mapping in"
                             "data_handler.text2id_tok and data_handler.id2text_tok.")


    # add option for caching the processed training set
    parser.add_argument("--cache_train",
                        action="store_true",
                        help=" Use this option if you wish to save the"
                             " processed training file (with ids) and reload it to save processing time.")



    return parser
