# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load the entire model on the GPU 0
device_map = {"": 0}
tokenizer = AutoTokenizer.from_pretrained("kd-shared/results_dpo_zephyr")
model = AutoModelForCausalLM.from_pretrained(
    "kd-shared/results_dpo_zephyr",
    device_map=device_map,
    torch_dtype=torch.bfloat16,
)

merged_dir = ("results_7b_zephyr_dpo_konstantin/final_merged_checkpoint")
model.save_pretrained(merged_dir)
tokenizer.save_pretrained(merged_dir)