@inproceedings{wang-etal-2024-craft,
title = "{CRAFT}: Extracting and Tuning Cultural Instructions from the Wild",
author = "Wang, Bin and
Lin, Geyu and
Liu, Zhengyuan and
Wei, Chengwei and
Chen, Nancy",
editor = "Prabhakaran, Vinodkumar and
Dev, Sunipa and
Benotti, Luciana and
Hershcovich, Daniel and
Cabello, Laura and
Cao, Yong and
Adebara, Ife and
Zhou, Li",
booktitle = "Proceedings of the 2nd Workshop on Cross-Cultural Considerations in NLP",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/landing_page/2024.c3nlp-1.4/",
doi = "10.18653/v1/2024.c3nlp-1.4",
pages = "42--47",
abstract = "Large language models (LLMs) have rapidly evolved as the foundation of various natural language processing (NLP) applications. Despite their wide use cases, their understanding of culturally-related concepts and reasoning remains limited. Meantime, there is a significant need to enhance these models' cultural reasoning capabilities, especially concerning underrepresented regions. This paper introduces a novel pipeline for extracting high-quality, culturally-related instruction tuning datasets from vast unstructured corpora. We utilize a self-instruction generation pipeline to identify cultural concepts and trigger instruction. By integrating with a general-purpose instruction tuning dataset, our model demonstrates enhanced capabilities in recognizing and understanding regional cultural nuances, thereby enhancing its reasoning capabilities. We conduct experiments across three regions: Singapore, the Philippines, and the United States, achieving performance improvement of up to 6{\%}. Our research opens new avenues for extracting cultural instruction tuning sets directly from unstructured data, setting a precedent for future innovations in the field."
}
Markdown (Informal)
[CRAFT: Extracting and Tuning Cultural Instructions from the Wild](https://preview.aclanthology.org/landing_page/2024.c3nlp-1.4/) (Wang et al., C3NLP 2024)
ACL