python ngram_acc.py --dataset_name mmlu --model_path meta-llama/Meta-Llama-3-8B-Instruct --model_name llama-3-8B-5-es --device "cuda:5" --n 5 --model_type chat

ARC

["What is the best way to model the effect of wind on sand dunes?\nChoices:['Pour a bucket a sand through the air.', 'Pour water over a pile of sand.', 'Filter sand through a screen into water.', 'Blow on a pile of sand through a straw.']\nOptions:['A', 'B', 'C', 'D']\nAnswer: D Blow on a pile of sand through a straw.\n\n"]

[" 以下哪種方法可以最好地判斷兩個人是否有血緣關係？\nChoices:['比較他們的血液型', '对比他们的笔迹。', '对比他们的基因。', '对比他们的指纹。']\nOptions:['A', 'B', 'C', 'D']\nAnswer: C 对比他们的基因。\n\n"]

" 1. Which is the most appropriate unit for expressing the distance that an airplane travels in 2 hours?\nChoices:['meter', 'kilometer','millimeter', 'centimeter']\nOptions:['A', 'B', 'C', 'D']\nAnswer: B kilometer\n\n"


mmlu

