#!/usr/bin/env python3 -u
# Copyright 2022 The OFA-Sys Team.
# All rights reserved.
# This source code is licensed under the Apache 2.0 license
# found in the LICENSE file in the root directory.
import string
import logging
import os
import sys
from typing import List, Dict, Optional
import json
import numpy as np
import torch
from fairseq import distributed_utils, options, tasks, utils
from fairseq.dataclass.utils import convert_namespace_to_omegaconf
from fairseq.logging import progress_bar
from fairseq.utils import reset_logging
from omegaconf import DictConfig
from torch.nn.functional import normalize
from utils import checkpoint_utils
from utils.eval_utils import eval_step, merge_results
from utils.zero_shot_utils import zero_shot_step

logging.basicConfig(
    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
    level=os.environ.get("LOGLEVEL", "INFO").upper(),
    stream=sys.stdout,
)
logger = logging.getLogger("ofa.evaluate")
def apply_half(t):
    if t.dtype is torch.float32:
        return t.to(dtype=torch.half)
    return t

def main(cfg: DictConfig, **kwargs):
    utils.import_user_module(cfg.common)

    reset_logging()
    logger.info(cfg)

    assert (
            cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None
    ), "Must specify batch size either with --max-tokens or --batch-size"

    # Fix seed for stochastic decoding
    if cfg.common.seed is not None and not cfg.generation.no_seed_provided:
        np.random.seed(cfg.common.seed)
        utils.set_torch_seed(cfg.common.seed)

    use_fp16 = cfg.common.fp16
    use_cuda = torch.cuda.is_available() and not cfg.common.cpu

    if use_cuda:
        torch.cuda.set_device(cfg.distributed_training.device_id)

    # Load ensemble
    overrides = eval(cfg.common_eval.model_overrides)
    # Deal with beam-search / all-candidate VQA eval
    logger.info("loading model(s) from {}".format(cfg.common_eval.path))
    if kwargs["zero_shot"]:
        task = tasks.setup_task(cfg.task)
        models, saved_cfg = checkpoint_utils.load_model_ensemble(
            utils.split_paths(cfg.common_eval.path),
            arg_overrides=overrides,
            task=task,
            suffix=cfg.checkpoint.checkpoint_suffix,
            strict=(cfg.checkpoint.checkpoint_shard_count == 1),
            num_shards=cfg.checkpoint.checkpoint_shard_count,
        )
    else:
        models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
            utils.split_paths(cfg.common_eval.path),
            arg_overrides=overrides,
            suffix=cfg.checkpoint.checkpoint_suffix,
            strict=(cfg.checkpoint.checkpoint_shard_count == 1),
            num_shards=cfg.checkpoint.checkpoint_shard_count,
        )

    # loading the dataset should happen after the checkpoint has been loaded
    # so we can give it the saved task config
    task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task)

    if cfg.generation.lm_path is not None:
        overrides["data"] = cfg.task.data

        try:
            lms, _ = checkpoint_utils.load_model_ensemble(
                [cfg.generation.lm_path], arg_overrides=overrides, task=None
            )
        except:
            logger.warning(
                f"Failed to load language model! Please make sure that the language model dict is the same "
                f"as target dict and is located in the data dir ({cfg.task.data})"
            )
            raise

        assert len(lms) == 1
    else:
        lms = [None]

    # Move models to GPU
    for model, ckpt_path in zip(
            models, utils.split_paths(
                cfg.common_eval.path)):
        if kwargs['ema_eval']:
            logger.info("loading EMA weights from {}".format(ckpt_path))
            model.load_state_dict(
                checkpoint_utils.load_ema_from_checkpoint(ckpt_path)['model'])
        model.eval()
        if use_fp16:
            model.half()
        if use_cuda and not cfg.distributed_training.pipeline_model_parallel:
            model.cuda()
        model.prepare_for_inference_(cfg)

    # Load dataset (possibly sharded)
    itr = task.get_batch_iterator(
        dataset=task.dataset(cfg.dataset.gen_subset),
        max_tokens=cfg.dataset.max_tokens,
        max_sentences=cfg.dataset.batch_size,
        max_positions=utils.resolve_max_positions(
            task.max_positions(), *[m.max_positions() for m in models]
        ),
        ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test,
        required_batch_size_multiple=1,
        seed=cfg.common.seed,
        num_shards=1,
        shard_id=1,
        num_workers=cfg.dataset.num_workers,
        data_buffer_size=cfg.dataset.data_buffer_size,
    ).next_epoch_itr(shuffle=False)
    progress = progress_bar.progress_bar(
        itr,
        log_format=cfg.common.log_format,
        log_interval=cfg.common.log_interval,
        default_log_format=(
            "tqdm" if not cfg.common.no_progress_bar else "simple"),
    )
    img_all_singular_values = None
    txt_all_singular_values = None
    for sample in progress:
        if "net_input" not in sample:
            continue
        sample = utils.move_to_cuda(sample) if use_cuda else sample
        sample = utils.apply_to_sample(
            apply_half, sample) if cfg.common.fp16 else sample
        with torch.no_grad():
            encoder_output = model.encoder.forward(sample['net_input']['src_tokens'].to('cuda'),
                                                 src_lengths= sample['net_input']['src_lengths'].to('cuda'),
                                                 patch_images=sample["net_input"]["patch_images"].to('cuda'),
                                                 patch_masks=sample["net_input"]["patch_masks"].to('cuda'))

            image_feature = encoder_output["encoder_out"][0]
            decoder_feature = model.decoder.extract_features(code_masks= None, encoder_out=encoder_output, prev_output_tokens=sample['net_input']['prev_output_tokens'].to('cuda'))
            image_feature = np.transpose(image_feature.cpu(), (1, 0, 2))
            image_feature = image_feature[:, :900, :].float()
            text_feature = decoder_feature[0]
            text_feature = text_feature[:, :-8, :].float()
            img_singular_values = []
            for i in range(image_feature.size(0)):
                h = image_feature[i, :, :]
                img_singular_values.append(normalize(torch.svd(h).S, p=1.0, dim=-1, eps=1e-12, out=None).tolist())


            if img_all_singular_values == None:
                img_all_singular_values = img_singular_values
            else:
                # for l_i, l in enumerate(img_singular_values):
                img_all_singular_values += img_singular_values

            # ---------- Text singular values ---------- #
            txt_singular_values = []
            for i in range(text_feature.size(0)):
                h = text_feature[i, :, :]
                txt_singular_values.append(normalize(torch.svd(h).S, p=1.0, dim=-1, eps=1e-12, out=None).tolist())


            if txt_all_singular_values == None:
                txt_all_singular_values = txt_singular_values
            else:
                # for l_i, l in enumerate(txt_singular_values):
                txt_all_singular_values += txt_singular_values

            # ------------------------------------------ #

    path = #cocoic_cgft_save_path
    os.makedirs(path, exist_ok=True)
    img_l12_file = f'{path}/svd_image_out_cgft.json'
    txt_l12_file = f'{path}/svd_text_out_cgft.json'

    with open(img_l12_file, 'w') as img_file:
        json.dump(img_all_singular_values, img_file)

    with open(txt_l12_file, 'w') as txt_file:
        json.dump(txt_all_singular_values, txt_file)

    print("SVD Done:")





def cli_main():
    parser = options.get_generation_parser()
    parser.add_argument(
        "--ema-eval",
        action='store_true',
        help="Use EMA weights to make evaluation.")
    parser.add_argument(
        "--beam-search-vqa-eval",
        action='store_true',
        help="Use beam search for vqa evaluation (faster inference speed but sub-optimal result), if not specified, we compute scores for each answer in the candidate set, which is slower but can obtain best result.")
    parser.add_argument("--zero-shot", action='store_true')
    args = options.parse_args_and_arch(parser)
    cfg = convert_namespace_to_omegaconf(args)
    distributed_utils.call_main(
        cfg,
        main,
        ema_eval=args.ema_eval,
        beam_search_vqa_eval=args.beam_search_vqa_eval,
        zero_shot=args.zero_shot,
    )


if __name__ == "__main__":
    cli_main()
