{
    "run": {
        "task": "retrieval",
        "lr_sched": "linear_warmup_cosine_lr",
        "init_lr": 1e-05,
        "min_lr": 0,
        "warmup_lr": 1e-08,
        "warmup_steps": 1000,
        "weight_decay": 0.05,
        "max_epoch": 5,
        "batch_size_train": 14,
        "batch_size_eval": 16,
        "lr_layer_decay": 0.95,
        "num_workers": 4,
        "accum_grad_iters": 1,
        "seed": 42,
        "output_dir": "output/BLIP2/Retrieval_coco",
        "amp": true,
        "resume_ckpt_path": null,
        "evaluate": false,
        "train_splits": [
            "train"
        ],
        "valid_splits": [
            "test"
        ],
        "k_test": 128,
        "device": "cuda",
        "world_size": 1,
        "dist_url": "env://",
        "distributed": false,
        "use_dist_eval_sampler": false
    },
    "model": {
        "arch": "blip2",
        "load_finetuned": false,
        "pretrained": "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_pretrained.pth",
        "finetuned": "https://storage.googleapis.com/sfr-vision-language-research/LAVIS/models/BLIP2/blip2_finetune_coco.pth",
        "image_size": 364,
        "drop_path_rate": 0,
        "use_grad_checkpoint": true,
        "vit_precision": "fp32",
        "freeze_vit": false,
        "num_query_token": 32,
        "model_type": "coco",
        "load_pretrained": true
    },
    "preprocess": {
        "vis_processor": {
            "train": {
                "name": "blip_image_train",
                "image_size": 364
            },
            "eval": {
                "name": "blip_image_eval",
                "image_size": 364
            }
        },
        "text_processor": {
            "train": {
                "name": "blip_caption"
            },
            "eval": {
                "name": "blip_caption"
            }
        }
    },
    "datasets": {
        "coco_retrieval": {
            "data_type": "images",
            "build_info": {
                "annotations": {
                    "train": {
                        "url": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_train.json",
                        "md5": "aa31ac474cf6250ebb81d18348a07ed8",
                        "storage": "coco/annotations/coco_karpathy_train.json"
                    },
                    "val": {
                        "url": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val.json",
                        "md5": "b273847456ef5580e33713b1f7de52a0",
                        "storage": "coco/annotations/coco_karpathy_val.json"
                    },
                    "test": {
                        "url": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_test.json",
                        "md5": "3ff34b0ef2db02d01c37399f6a2a6cd1",
                        "storage": "coco/annotations/coco_karpathy_test.json"
                    }
                },
                "images": {
                    "storage": "/raid/temp/share/datasets/vision/coco/images/"
                }
            },
            "vis_processor": {
                "train": {
                    "name": "blip2_image_train",
                    "image_size": 364
                },
                "eval": {
                    "name": "blip_image_eval",
                    "image_size": 364
                }
            },
            "text_processor": {
                "train": {
                    "name": "blip_caption"
                },
                "eval": {
                    "name": "blip_caption"
                }
            }
        }
    }
}
