

"""
details see
https://huggingface.co/transformers/main_classes/configuration.html#transformers.PretrainedConfig

"""



from transformers import BertConfig, EncoderDecoderConfig, BertModel

#json_file_path = "./configs.json"
# Initializing a BERT bert-base-uncased style configuration
#config_encoder = BertConfig()
#config_decoder = BertConfig()


json_file_path = "./configs_cased.json"
model = BertModel.from_pretrained('bert-base-cased')
# Initializing a BERT bert-base-uncased style configuration
config_encoder = model.config
config_decoder = model.config

config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)


### re define key params
# set decoder config to causal lm
config.decoder.is_decoder = True
config.decoder.add_cross_attention = True

# Parameters for sequence generation
config.decoder.do_sample = True
    # Whether or not to use sampling ; use greedy decoding otherwise.
config.decoder.early_stopping = True
    # Whether to stop the beam search when at least ``num_beams`` sentences are finished per batch or not.
config.decoder.num_beams = 4
#config.decoder.num_beam_groups = 1 
config.decoder.temperature = 1.0 
#config.decoder.diversity_penalty = 0.0 
#config.decoder.top_k = kwargs.pop("top_k", 50)
#config.decoder.top_p = kwargs.pop("top_p", 1.0)
#config.decoder.repetition_penalty = kwargs.pop("repetition_penalty", 1.0)
#config.decoder.length_penalty = kwargs.pop("length_penalty", 1.0)
#config.decoder.num_return_sequences = kwargs.pop("num_return_sequences", 1)
#config.decoder.output_scores = kwargs.pop("output_scores", False)
#config.decoder.return_dict_in_generate = kwargs.pop("return_dict_in_generate", False)



#config = EncoderDecoderConfig.from_encoder_decoder_configs(config_encoder, config_decoder)
config.to_json_file( json_file_path )