# Copyright (c) Facebook, Inc. and its affiliates.

# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import copy
import json
import itertools
import random
from typing import (
    List,
    Union,
    Optional,
)

import torch
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader
import wandb
from jsonargparse import ArgumentParser
from torch.utils.data import (
    Subset,
    ConcatDataset,
)
import tensorflow as tf

import egg.core as core
from egg.core import EarlyStopperAccuracy
from egg.core.callbacks import (
    WandbLogger,
    Callback,
)
from egg.zoo.compo_vs_generalization.archs import (
    Freezer,
    NonLinearReceiver,
    PlusOneWrapper,
    Receiver,
    Sender,
    MultiRnnReceiverDeterministic,
    SenderExpansionWrapper,
    MultiRnnReceiverReinforce,
    DiscriminationReceiver,
    MultiDiscriminationRnnReceiverReinforce,
    MultiDiscriminationRnnReceiverDeterministic,
    ReceiverLogProbWrapper,
    DiscriminationReceiverLogProbWrapper,
)
from egg.zoo.compo_vs_generalization.data import (
    ScaledDataset,
    enumerate_attribute_value,
    one_hotify,
    select_subset_V1,
    select_subset_V2,
    split_holdout,
    split_train_test,
    split_train_val_test,
    ImageDiscrimiationDataset,
    ImageDiscriminationDatasetLogitWrapper,
)
from egg.zoo.compo_vs_generalization.intervention import (
    Evaluator, 
    Metrics,
    ValBestCheckpoint,
    RandomResetter,
    UniformResetter,
    DetailedWandbLogger,
    SimultaneousResetter,
    EpochLogEvaluator,
)
from egg.zoo.compo_vs_generalization.losses import (
    DiscriminationLoss,
    MultiDiscriminationLoss
)


GLOBAL_SEED = 7


def get_params(params):
    parser = ArgumentParser()
    parser.add_argument("--n_attributes", type=int, default=4, help="")
    parser.add_argument("--n_values", type=int, default=4, help="")
    parser.add_argument("--data_scaler", type=int, default=100)
    parser.add_argument("--stats_freq", type=int, default=0)
    parser.add_argument("--val_eval_freq", type=int, default=1)
    parser.add_argument("--test_eval_freq", type=int, default=0)
    parser.add_argument(
        "--baseline", type=str, choices=["no", "mean", "builtin"], default="mean"
    )
    parser.add_argument(
        "--density_data", type=int, default=0, help="no sampling if equal 0"
    )

    parser.add_argument(
        "--sender_hidden",
        type=int,
        default=50,
        help="Size of the hidden layer of Sender (default: 10)",
    )
    parser.add_argument(
        "--receiver_hidden",
        type=int,
        default=50,
        help="Size of the hidden layer of Receiver (default: 10)",
    )

    parser.add_argument(
        "--sender_entropy_coeff",
        type=float,
        default=1e-2,
        help="Entropy regularisation coeff for Sender (default: 1e-2)",
    )

    parser.add_argument("--sender_cell", type=str, default="rnn")
    parser.add_argument("--receiver_cell", type=str, default="rnn")
    parser.add_argument(
        "--sender_emb",
        type=int,
        default=10,
        help="Size of the embeddings of Sender (default: 10)",
    )
    parser.add_argument(
        "--receiver_emb",
        type=int,
        default=10,
        help="Size of the embeddings of Receiver (default: 10)",
    )
    parser.add_argument(
        "--early_stopping_thr",
        type=float,
        default=0.999,
        help="Early stopping threshold on accuracy (defautl: 0.99999)",
    )
    parser.add_argument(
        "--load_val_best",
        type=bool,
        default=True,
    )
    parser.add_argument(
        "--wandb_project",
        type=str,
        default='div_int',
    )
    parser.add_argument(
        "--wandb_name",
        type=str,
        default='div_int',
    )
    parser.add_argument(
        "--wandb_tags",
        type=List[str],
        default='div_int',
    )
    parser.add_argument(
        "--n_att_n_comb_n_dup",
        type=str,
        default='None',
    )
    parser.add_argument(
        "--validation_ratio",
        type=float,
        default=None,
    )
    parser.add_argument(
        "--test_ratio",
        type=float,
        default=None,
    )
    parser.add_argument(
        "--ckpt_path",
        type=str,
        default='checkpoints',
    )
    parser.add_argument(
        "--loss_type",
        type=str,
        default='cross_entropy',
    )
    parser.add_argument(
        "--variable_len",
        type=bool,
        default=False,
    )
    parser.add_argument(
        "--group_size",
        type=int,
        default=1,
    )
    parser.add_argument(
        "--sender_reset_period",
        type=int,
        default=0,
    )
    parser.add_argument(
        "--receiver_reset_period",
        type=int,
        default=0,
    )
    parser.add_argument(
        "--reset_type",
        type=str,
        default='uniform',
    )
    parser.add_argument(
        "--reset_on_epoch",
        type=bool,
        default=True,
    )
    parser.add_argument(
        "--eol_n_epochs",
        type=int,
        default=100,
    )
    parser.add_argument(
        "--eol_n_attributes",
        type=List[int],
        default=[],
    )
    parser.add_argument(
        "--topsim_max_samples",
        type=int,
        default=None,
    )
    parser.add_argument(
        "--cross_entropy_weight",
        type=float,
        default=1.0,
    )
    parser.add_argument(
        "--dataset_dir",
        type=str,
    )
    parser.add_argument(
        "--train_n_samples",
        type=Optional[int],
    )
    parser.add_argument(
        "--eval_n_samples",
        type=Optional[int],
    )
    parser.add_argument(
        "--add_layer_norm",
        type=bool,
        default=False,
    )
    parser.add_argument(
        "--n_context",
        type=int,
    )
    parser.add_argument(
        "--n_workers",
        type=int,
    )
    parser.add_argument(
        "--val_batch_size",
        type=int,
    )
    parser.add_argument(
        "--test_batch_size",
        type=int,
    )
    parser.add_argument(
        "--train_as",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--ce_group_mean",
        type=str,
        default=None,
    )
    parser.add_argument(
        "--ood_test_ratio",
        type=Optional[float],
        default=None,
    )
    parser.add_argument(
        "--preserve_eos",
        type=bool,
        default=False,
    )
    parser.add_argument(
        "--metric_exclude_eos",
        type=bool,
        default=False,
    )
    parser.add_argument(
        "--metric_max_samples",
        type=Optional[int],
        default=None,
    )
    parser.add_argument(
        "--abstractness_max_samples",
        type=Optional[int],
        default=None,
    )
    parser.add_argument(
        "--use_global_seed",
        type=bool,
        default=False,
    )

    args = core.init(arg_parser=parser, params=params)
    return args


def _set_seed(seed):
    import random
    import numpy as np

    random.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


def main(params):
    import copy

    opts = get_params(params)
    device = opts.device
    assert not opts.variable_len
    seed = GLOBAL_SEED if opts.use_global_seed else opts.random_seed,

    _set_seed(opts.random_seed)

    # Prevent TF from using GPU memory.
    tf.config.set_visible_devices([], 'GPU')

    opts.n_attributes = ImageDiscrimiationDataset.n_attributes(opts.dataset_dir)
    opts.n_att_n_comb_n_dup = eval(opts.n_att_n_comb_n_dup)
    if opts.n_att_n_comb_n_dup is None:
        opts.n_att_n_comb_n_dup = [(opts.n_attributes, 1, 1)]
    
    if opts.ood_test_ratio is not None:
        rng = np.random.default_rng(seed)
        combs = list(range(opts.n_attributes))
        rng.shuffle(combs)
        n_test = int(len(combs) * opts.ood_test_ratio)
        test_att_idx = combs[:n_test]
        train_att_idx = combs[n_test:]

    att_indices = []
    rng = np.random.default_rng(opts.random_seed)
    for n_att, n_comb, n_dup in opts.n_att_n_comb_n_dup:
        combs = list(range(opts.n_attributes))
        rng.shuffle(combs)
        combs = torch.tensor(combs).split(n_att)
        for comb in combs:
            assert len(comb) == n_att
        att_idx = rng.choice(len(combs), replace=False, size=n_comb)
        att_idx = np.tile(att_idx, n_dup)
        att_indices += [combs[i].tolist() for i in att_idx]

    train = ImageDiscrimiationDataset(
        dataset_dir=opts.dataset_dir,
        split='train' if opts.train_as is None else opts.train_as,
        att_indices=att_indices,
        n_distractors=opts.n_context - 1,
        n_samples_per_epoch=opts.train_n_samples,
        deterministic=False,
        batch_size=opts.batch_size,
        scale=opts.data_scaler,
        load_at_init=True,
        att_filter=None if opts.ood_test_ratio is None else train_att_idx,
    )
    validation = ImageDiscrimiationDataset(
        dataset_dir=opts.dataset_dir,
        split='valid' if opts.validation_ratio > 0.0 else 'train',
        att_indices=att_indices,
        n_distractors=opts.n_context - 1,
        n_samples_per_epoch=opts.eval_n_samples,
        deterministic=True,
        seed=seed,
        batch_size=opts.val_batch_size,
        load_at_init=True,
        att_filter=None if opts.ood_test_ratio is None else train_att_idx,
    )
    test = ImageDiscrimiationDataset(
        dataset_dir=opts.dataset_dir,
        split='test',
        att_indices=att_indices,
        n_distractors=opts.n_context - 1,
        n_samples_per_epoch=opts.eval_n_samples,
        deterministic=True,
        seed=seed,
        batch_size=opts.test_batch_size,
        load_at_init=True,
        att_filter=None if opts.ood_test_ratio is None else test_att_idx,
    )

    def workder_init_fn(worker_id: int):
        worker_info = torch.utils.data.get_worker_info()
        # indices = torch.arange(len(worker_info.dataset))
        # indices = indices.chunk(worker_info.num_workers)[worker_id]
        # print('id', worker_id)
        # print('len', len(indices))
        # print('start:end', indices[0], indices[-1])
        worker_info.dataset.load_data(worker_info.num_workers, worker_id)

    train_loader, validation_loader, test_loader = [
        DataLoader(
            dataset=dataset,
            shuffle=shuffle, 
            num_workers=opts.n_workers, 
            # pin_memory=True, 
            batch_size=1,
            # prefetch_factor=0,
            # persistent_workers=True,
            worker_init_fn=init_fn,
            collate_fn=ImageDiscrimiationDataset.collate_fn,
        )
        for dataset, shuffle, init_fn in zip(
            [train, validation, test],
            [True, False, False],
            [None, None, None],
        )
    ]
    # full = ConcatDataset([train, validation, test])
    
    receivers = []
    for n_att, n_comb, n_dup in opts.n_att_n_comb_n_dup:
        for _ in range(n_comb):
            for _ in range(n_dup):
                if opts.receiver_cell in ["lstm", "rnn", "gru"]:
                    receiver = DiscriminationReceiver(
                        out_dim=opts.receiver_hidden,
                        message_in_dim=opts.receiver_hidden, 
                        visual_in_dim=test.visual_dim,
                    )
                    receiver = core.RnnReceiverDeterministic(
                        receiver,
                        opts.vocab_size if opts.variable_len else opts.vocab_size + 1,
                        opts.receiver_emb,
                        opts.receiver_hidden,
                        cell=opts.receiver_cell,
                    )
                else:
                    raise ValueError(f"Unknown receiver cell, {opts.receiver_cell}")
                receivers.append(receiver)
    if opts.loss_type == 'cross_entropy':
        receiver = MultiDiscriminationRnnReceiverDeterministic(receivers)
    elif opts.loss_type in ['task_success', 'mixed', 'mixed_wo_ce']:
        receiver = MultiDiscriminationRnnReceiverReinforce(
            receivers, 
        )
    else:
        raise ValueError()

    if opts.sender_cell in ["lstm", "rnn", "gru"]:
        sender = Sender(n_inputs=test.visual_dim, n_hidden=opts.sender_hidden)
        # sender = SenderExpansionWrapper(sender, len(att_indices))
        sender = core.RnnSenderReinforce(
            agent=sender,
            vocab_size=opts.vocab_size,
            embed_dim=opts.sender_emb,
            hidden_size=opts.sender_hidden,
            max_len=opts.max_len,
            cell=opts.sender_cell,
            layer_norm=opts.add_layer_norm,
        )
    else:
        raise ValueError(f"Unknown sender cell, {opts.sender_cell}")
    
    if not opts.variable_len:
        sender = PlusOneWrapper(sender, preserve_eos=opts.preserve_eos)
    loss = MultiDiscriminationLoss(
        att_indices=att_indices, 
        loss_type=opts.loss_type, 
        group_size=opts.group_size,
        cross_entropy_weight=opts.cross_entropy_weight,
        ce_group_mean=opts.ce_group_mean,
    )

    baseline = {
        "no": core.baselines.NoBaseline,
        "mean": core.baselines.MeanBaseline,
        "builtin": core.baselines.BuiltInBaseline,
    }[opts.baseline]

    game = core.SenderReceiverRnnReinforce(
        sender,
        receiver,
        loss,
        sender_entropy_coeff=opts.sender_entropy_coeff,
        receiver_entropy_coeff=0.0,
        length_cost=0.0,
        baseline_type=baseline,
    )
    optimizer = torch.optim.Adam(game.parameters(), lr=opts.lr)

    max_idx = np.argmax([len(att) for att in att_indices])
    metrics_evaluators = [
        Metrics(
            ImageDiscriminationDatasetLogitWrapper(
                ImageDiscrimiationDataset(
                    dataset_dir=opts.dataset_dir,
                    split=split,
                    att_indices=[
                        list(range(ImageDiscrimiationDataset.n_attributes(opts.dataset_dir)))
                    ], # Not used.
                    n_distractors=1,
                    deterministic=True,
                    seed=seed,
                    n_samples_per_epoch=None,
                    batch_size=1,
                    att_filter=ood_idx,
                )
            ),
            opts.device,
            None,
            None,
            opts.vocab_size if opts.variable_len else opts.vocab_size + 1,
            freq=opts.stats_freq,
            name=name,
            max_len=opts.max_len, # Not counting EOS.
            topsim_max_samples=opts.topsim_max_samples,
            seed=seed,
            batch_size=opts.batch_size,
            exclude_eos=opts.metric_exclude_eos,
            receiver=DiscriminationReceiverLogProbWrapper(
                receivers[max_idx], 
            ),
            receiver_dataset=ImageDiscrimiationDataset(
                dataset_dir=opts.dataset_dir,
                split=split,
                att_indices=[att_indices[max_idx]],
                n_distractors=opts.n_context - 1,
                deterministic=True,
                seed=seed,
                n_samples_per_epoch=None,
                batch_size=1,
                att_filter=ood_idx,
            ),
            receiver_batch_size=1,
            receiver_collate_fn=ImageDiscrimiationDataset.collate_fn,
            max_samples=opts.metric_max_samples,
            abstractness_dataset=ImageDiscrimiationDataset(
                dataset_dir=opts.dataset_dir,
                split=split,
                att_indices=[[idx] for idx in ood_idx],
                n_distractors=2,
                deterministic=True,
                seed=seed,
                n_samples_per_epoch=None,
                batch_size=1,
                att_filter=ood_idx,
                skip_short_atts=True,
            ),
            abstractness_max_samples=opts.abstractness_max_samples,
        ) for split, name, ood_idx in zip(
            ['test'] + (['valid'] if opts.validation_ratio > 0.0 else []),
            ['test'] + (['valid'] if opts.validation_ratio > 0.0 else []),
            [test_att_idx] + ([train_att_idx] if opts.validation_ratio > 0.0 else []),
            # [test] + ([validation] if opts.validation_ratio > 0.0 else []),
        )
    ]

    loaders = [
        (
            "test",
            test_loader,
            MultiDiscriminationLoss(
                att_indices=att_indices, 
                loss_type=opts.loss_type,
                group_size=opts.group_size,
                cross_entropy_weight=opts.cross_entropy_weight,
                ce_group_mean=opts.ce_group_mean,
            ),
        ),
    ]
    if opts.validation_ratio > 0.0:
        val_test_loaders = loaders + [(
            (
                "val",
                validation_loader,
                MultiDiscriminationLoss(
                    att_indices=att_indices, 
                    loss_type=opts.loss_type,
                    group_size=opts.group_size,
                    cross_entropy_weight=opts.cross_entropy_weight,
                    ce_group_mean=opts.ce_group_mean,
                ),
            )
        )]

    epoch_evaluator = EpochLogEvaluator(
        val_test_loaders, 
        opts.device, 
    )
    evaluator = Evaluator(
        loaders, 
        opts.device, 
        freq=opts.test_eval_freq
    )
    
    # early_stopper = EarlyStopperAccuracy(opts.early_stopping_thr, validation=True)

    wandb_logger = DetailedWandbLogger(
                opts=opts, 
                project=opts.wandb_project, 
                name=opts.wandb_name, 
                tags=opts.wandb_tags,
    )
    
    if opts.load_val_best: 
        val_best_saver = ValBestCheckpoint(
            checkpoint_path=f'{opts.ckpt_path}/{wandb.run.project}/{wandb.run.id}',
            checkpoint_freq=opts.val_eval_freq,
            prefix='',
        )
    else:
        val_best_saver = Callback()
    
    if opts.reset_type == 'random':
        resetter = RandomResetter(
            receiver_period=opts.receiver_reset_period,
            sender_period=opts.sender_reset_period,
            on_epoch=opts.reset_on_epoch,
        )
    elif opts.reset_type == 'uniform':
        resetter = UniformResetter(
            receiver_period=opts.receiver_reset_period,
            sender_period=opts.sender_reset_period,
            on_epoch=opts.reset_on_epoch,
        )
    elif opts.reset_type == 'simultaneous':
        resetter = SimultaneousResetter(
            receiver_period=opts.receiver_reset_period,
            sender_period=opts.sender_reset_period,
            on_epoch=opts.reset_on_epoch,
        )
    else:
        raise ValueError()
        

    trainer = core.Trainer(
        game=game,
        optimizer=optimizer,
        train_data=train_loader,
        validation_data=validation_loader,
        callbacks=[
            val_best_saver, # This needs to come at first.
            core.ConsoleLogger(as_json=True, print_train_loss=False),
            wandb_logger,
            # early_stopper,
            resetter,
            epoch_evaluator,
            evaluator,
            *metrics_evaluators,
        ],
    )
    trainer.train(n_epochs=opts.n_epochs)



    core.get_opts().preemptable = False
    core.get_opts().checkpoint_path = None

    # freeze Sender and probe how fast a simple Receiver will learn the thing
    frozen_sender = Freezer(copy.deepcopy(sender))

    prev_att_indices = att_indices
    prev_n_atts = []
    for prev_att_idx in prev_att_indices:
        if len(prev_att_idx) not in prev_n_atts:
            prev_n_atts.append(len(prev_att_idx))

    n_atts = list(set(prev_n_atts) | set(opts.eol_n_attributes))

    for n_att in n_atts:
        del(train_loader)
        del(train)

        wandb.define_metric(f'eol_{n_att}_step')
        wandb.define_metric(f'eol_{n_att}/*', f'eol_{n_att}_step')
        wandb_logger.global_prefix = f'eol_{n_att}/'
        wandb_logger.step = 0
        _set_seed(opts.random_seed)

        att_indices = []
        rng = np.random.default_rng(opts.random_seed)
        att_indices = None
        for prev_att_idx in prev_att_indices:
            if len(prev_att_idx) == n_att:
                att_indices = [prev_att_idx]
        if att_indices is None:
            combs = list(range(opts.n_attributes))
            rng.shuffle(combs)
            combs = torch.tensor(combs).split(n_att)
            assert len(combs[0]) == n_att
            att_indices = [combs[0].tolist()]

        receivers = []
        if opts.receiver_cell in ["lstm", "rnn", "gru"]:
            receiver = DiscriminationReceiver(
                out_dim=opts.receiver_hidden,
                message_in_dim=opts.receiver_hidden, 
                visual_in_dim=test.visual_dim,
            )
            receiver = core.RnnReceiverDeterministic(
                receiver,
                opts.vocab_size if opts.variable_len else opts.vocab_size + 1,
                opts.receiver_emb,
                opts.receiver_hidden,
                cell=opts.receiver_cell,
            )
        else:
            raise ValueError(f"Unknown receiver cell, {opts.receiver_cell}")
        receivers.append(receiver)
        if opts.loss_type == 'cross_entropy':
            receiver = MultiDiscriminationRnnReceiverDeterministic(receivers)
        elif opts.loss_type in ['task_success', 'mixed', 'mixed_wo_ce']:
            receiver = MultiDiscriminationRnnReceiverReinforce(
                receivers, 
            )
        else:
            raise ValueError()

        loss = MultiDiscriminationLoss(
            att_indices=att_indices, 
            loss_type=opts.loss_type, 
            group_size=1,
            cross_entropy_weight=opts.cross_entropy_weight,
        )

        game = core.SenderReceiverRnnReinforce(
            frozen_sender,
            receiver,
            loss,
            sender_entropy_coeff=0.0,
            receiver_entropy_coeff=0.0,
        )
        optimizer = torch.optim.Adam(receiver.parameters(), lr=opts.lr)
        early_stopper = EarlyStopperAccuracy(
            opts.early_stopping_thr, validation=False
        )

        train = ImageDiscrimiationDataset(
            dataset_dir=opts.dataset_dir,
            split='train' if opts.train_as is None else opts.train_as,
            att_indices=att_indices,
            n_distractors=opts.n_context - 1,
            n_samples_per_epoch=opts.train_n_samples,
            deterministic=False,
            batch_size=opts.batch_size,
            scale=opts.data_scaler,
            load_at_init=True,
            att_filter=None if opts.ood_test_ratio is None else train_att_idx,
        )
        validation = ImageDiscrimiationDataset(
            dataset_dir=opts.dataset_dir,
            split='valid' if opts.validation_ratio > 0.0 else 'train',
            att_indices=att_indices,
            n_distractors=opts.n_context - 1,
            n_samples_per_epoch=opts.eval_n_samples,
            deterministic=True,
            seed=seed,
            batch_size=opts.val_batch_size,
            load_at_init=True,
            att_filter=None if opts.ood_test_ratio is None else train_att_idx,
        )
        test = ImageDiscrimiationDataset(
            dataset_dir=opts.dataset_dir,
            split='test',
            att_indices=att_indices,
            n_distractors=opts.n_context - 1,
            n_samples_per_epoch=opts.eval_n_samples,
            deterministic=True,
            seed=seed,
            batch_size=opts.test_batch_size,
            load_at_init=True,
            att_filter=None if opts.ood_test_ratio is None else test_att_idx,
        )

        train_loader, validation_loader, test_loader = [
            DataLoader(
                dataset=dataset,
                shuffle=shuffle, 
                num_workers=opts.n_workers, 
                # pin_memory=True, 
                batch_size=1,
                # prefetch_factor=2,
                # persistent_workers=True,
                worker_init_fn=init_fn,
                collate_fn=ImageDiscrimiationDataset.collate_fn,
            )
            for dataset, shuffle, init_fn in zip(
                [train, validation, test],
                [True, False, False],
                [None, None, None],
            )
        ]

        loaders = [
            (
                "test",
                test_loader,
                MultiDiscriminationLoss(
                    att_indices=att_indices, 
                    loss_type=opts.loss_type,
                    group_size=1,
                    cross_entropy_weight=opts.cross_entropy_weight,
                ),
            ),
        ]
        if opts.validation_ratio > 0.0:
            val_test_loaders = loaders + [(
                (
                    "val",
                    validation_loader,
                    MultiDiscriminationLoss(
                        att_indices=att_indices, 
                        loss_type=opts.loss_type,
                        group_size=1,
                        cross_entropy_weight=opts.cross_entropy_weight,
                    ),
                )
            )]

        epoch_evaluator = EpochLogEvaluator(
            val_test_loaders, 
            opts.device, 
            global_prefix=f'eol_{n_att}/'
        )
        evaluator = Evaluator(
            loaders, 
            opts.device, 
            freq=opts.test_eval_freq,
            global_prefix=f'eol_{n_att}/'
        )

        trainer = core.Trainer(
            game=game,
            optimizer=optimizer,
            train_data=train_loader,
            validation_data=validation_loader,
            callbacks=[
                wandb_logger,
                early_stopper, 
                epoch_evaluator,
                evaluator,
            ],
        )
        trainer.train(n_epochs=opts.eol_n_epochs)

    print("---End--")

    core.close()


if __name__ == "__main__":
    import sys

    main(sys.argv[1:])
