@proceedings{genbench-2024-1,
    title = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.0/",
    doi = "10.18653/v1/2024.genbench-1.0"
}
@inproceedings{dubey-2024-evaluating,
    title = "Evaluating the fairness of task-adaptive pretraining on unlabeled test data before few-shot text classification",
    author = "Dubey, Kush",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.1/",
    doi = "10.18653/v1/2024.genbench-1.1",
    pages = "1--26"
}
@inproceedings{falkenstein-etal-2024-language,
    title = "From Language to Pixels: Task Recognition and Task Learning in {LLM}s",
    author = "Falkenstein, Janek  and
      Schuster, Carolin M.  and
      Berger, Alexander H.  and
      Groh, Georg",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.2/",
    doi = "10.18653/v1/2024.genbench-1.2",
    pages = "27--41"
}
@inproceedings{bunzeck-zarriess-2024-slayqa,
    title = "The {S}lay{QA} benchmark of social reasoning: testing gender-inclusive generalization with neopronouns",
    author = "Bunzeck, Bastian  and
      Zarrie{\ss}, Sina",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.3/",
    doi = "10.18653/v1/2024.genbench-1.3",
    pages = "42--53"
}
@inproceedings{arcadinho-etal-2024-automated,
    title = "Automated test generation to evaluate tool-augmented {LLM}s as conversational {AI} agents",
    author = "Arcadinho, Samuel  and
      Aparicio, David Oliveira  and
      Almeida, Mariana S. C.",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.4/",
    doi = "10.18653/v1/2024.genbench-1.4",
    pages = "54--68"
}
@inproceedings{wang-etal-2024-mmlu,
    title = "{MMLU}-{SR}: A Benchmark for Stress-Testing Reasoning Capability of Large Language Models",
    author = "Wang, Wentian  and
      Jain, Sarthak  and
      Kantor, Paul  and
      Feldman, Jacob  and
      Gallos, Lazaros  and
      Wang, Hao",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.5/",
    doi = "10.18653/v1/2024.genbench-1.5",
    pages = "69--85"
}
@inproceedings{bueno-etal-2024-mlissard,
    title = "{ML}issard: Multilingual Long and Simple Sequential Reasoning Benchmarks",
    author = "Bueno, Mirelle Candida  and
      Lotufo, Roberto  and
      Frassetto Nogueira, Rodrigo",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.6/",
    doi = "10.18653/v1/2024.genbench-1.6",
    pages = "86--95"
}
@inproceedings{park-etal-2024-multiprageval,
    title = "{M}ulti{P}rag{E}val: Multilingual Pragmatic Evaluation of Large Language Models",
    author = "Park, Dojun  and
      Lee, Jiwoo  and
      Park, Seohyun  and
      Jeong, Hyeyun  and
      Koo, Youngeun  and
      Hwang, Soonha  and
      Park, Seonwoo  and
      Lee, Sungeun",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.7/",
    doi = "10.18653/v1/2024.genbench-1.7",
    pages = "96--119"
}
@inproceedings{arzt-hanbury-2024-beyond,
    title = "Beyond the Numbers: Transparency in Relation Extraction Benchmark Creation and Leaderboards",
    author = "Arzt, Varvara  and
      Hanbury, Allan",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.8/",
    doi = "10.18653/v1/2024.genbench-1.8",
    pages = "120--130"
}
@inproceedings{ross-etal-2024-artificial,
    title = "Is artificial intelligence still intelligence? {LLM}s generalize to novel adjective-noun pairs, but don{'}t mimic the full human distribution",
    author = "Ross, Hayley  and
      Davidson, Kathryn  and
      Kim, Najoung",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.9/",
    doi = "10.18653/v1/2024.genbench-1.9",
    pages = "131--153"
}
@inproceedings{phatthiyaphaibun-etal-2024-chie,
    title = "{CHIE}: Generative {MRC} Evaluation for in-context {QA} with Correctness, Helpfulness, Irrelevancy, and Extraneousness Aspects",
    author = "Phatthiyaphaibun, Wannaphong  and
      Nonesung, Surapon  and
      Limkonchotiwat, Peerat  and
      Udomcharoenchaikit, Can  and
      Sawatphol, Jitkapat  and
      Chuangsuwanich, Ekapol  and
      Nutanong, Sarana",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.10/",
    doi = "10.18653/v1/2024.genbench-1.10",
    pages = "154--164"
}
@inproceedings{dutt-etal-2024-investigating,
    title = "Investigating the Generalizability of Pretrained Language Models across Multiple Dimensions: A Case Study of {NLI} and {MRC}",
    author = "Dutt, Ritam  and
      Ray Choudhury, Sagnik  and
      Rao, Varun Venkat  and
      Rose, Carolyn  and
      Vydiswaran, V.G.Vinod",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.11/",
    doi = "10.18653/v1/2024.genbench-1.11",
    pages = "165--182"
}
@inproceedings{razzhigaev-etal-2024-omnidialog,
    title = "{O}mni{D}ialog: A Multimodal Benchmark for Generalization Across Text, Visual, and Audio Modalities",
    author = "Razzhigaev, Anton  and
      Kurkin, Maxim  and
      Goncharova, Elizaveta  and
      Abdullaeva, Irina  and
      Lysenko, Anastasia  and
      Panchenko, Alexander  and
      Kuznetsov, Andrey  and
      Dimitrov, Denis",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.12/",
    doi = "10.18653/v1/2024.genbench-1.12",
    pages = "183--195"
}
@inproceedings{koufakou-etal-2024-towards,
    title = "Towards a new Benchmark for Emotion Detection in {NLP}: A Unifying Framework of Recent Corpora",
    author = "Koufakou, Anna  and
      Nieves, Elijah  and
      Peller, John",
    editor = "Hupkes, Dieuwke  and
      Dankers, Verna  and
      Batsuren, Khuyagbaatar  and
      Kazemnejad, Amirhossein  and
      Christodoulopoulos, Christos  and
      Giulianelli, Mario  and
      Cotterell, Ryan",
    booktitle = "Proceedings of the 2nd GenBench Workshop on Generalisation (Benchmarking) in NLP",
    month = nov,
    year = "2024",
    address = "Miami, Florida, USA",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/volume-fixes/2024.genbench-1.13/",
    doi = "10.18653/v1/2024.genbench-1.13",
    pages = "196--206"
}