# Hyperparameters for BERTForRationaleGeneration 

train_batch_size: 16,
eval_batch_size: 8,
bert_batch_size: 500,
learning_rate: 5e-5,
num_train_epochs: 15,
in_channels: 1,
out_channels: 300,
kernel_height: 4,
stride: 1,
padding: 0,
dropout: 0.1,
max_num_sentence: 1000,
max_seq_length: 25,
n_layers: 2,
n_heads: 4,
ff_dim: 600,
do_lower_case: false,
warmup_proportion: 0.1,
gradient_accumulation_steps: 1,
fp16: false,
loss_scale: 0
