@inproceedings{alzahrani-etal-2024-benchmarks,
    title = "When Benchmarks are Targets: Revealing the Sensitivity of Large Language Model Leaderboards",
    author = "Alzahrani, Norah  and
      Alyahya, Hisham  and
      Alnumay, Yazeed  and
      AlRashed, Sultan  and
      Alsubaie, Shaykhah  and
      Almushayqih, Yousef  and
      Mirza, Faisal  and
      Alotaibi, Nouf  and
      Al-Twairesh, Nora  and
      Alowisheq, Areeb  and
      Bari, M Saiful  and
      Khan, Haidar",
    editor = "Ku, Lun-Wei  and
      Martins, Andre  and
      Srikumar, Vivek",
    booktitle = "Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = aug,
    year = "2024",
    address = "Bangkok, Thailand",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/fix-sig-urls/2024.acl-long.744/",
    doi = "10.18653/v1/2024.acl-long.744",
    pages = "13787--13805"
}