@inproceedings{tu-etal-2025-rrinf,
title = "{RRI}nf: Efficient Influence Function Estimation via Ridge Regression for Large Language Models and Text-to-Image Diffusion Models",
author = "Tu, Zhuozhuo and
Chen, Cheng and
Du, Yuxuan",
editor = "Christodoulopoulos, Christos and
Chakraborty, Tanmoy and
Rose, Carolyn and
Peng, Violet",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing",
month = nov,
year = "2025",
address = "Suzhou, China",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-luhme/2025.emnlp-main.933/",
doi = "10.18653/v1/2025.emnlp-main.933",
pages = "18505--18518",
ISBN = "979-8-89176-332-6",
abstract = "The quality of data plays a vital role in the development of Large-scale Generative Models. Understanding how important a data point is for a generative model is essential for explaining its behavior and improving the performance. The influence function provides a framework for quantifying the impact of individual training data on model predictions. However, the high computational cost has hindered their applicability in large-scale applications. In this work, we present RRInf, a novel and principled method for estimating influence function in large-scale generative AI models. We show that influence function estimation can be transformed into a ridge regression problem. Based on this insight, we develop an algorithm that is efficient and scalable to large models. Experiments on noisy data detection and influential data identification tasks demonstrate that RRInf outperforms existing methods in terms of both efficiency and effectiveness for commonly used large models: RoBERTa-large, Llama-2-13B-chat, Llama-3-8B and stable-diffusion-v1.5."
}Markdown (Informal)
[RRInf: Efficient Influence Function Estimation via Ridge Regression for Large Language Models and Text-to-Image Diffusion Models](https://preview.aclanthology.org/ingest-luhme/2025.emnlp-main.933/) (Tu et al., EMNLP 2025)
ACL