@inproceedings{varshney-mc-2026-index,
title = "Index-Time Prefix Injection for Multi-Tenant Retrieval: Improving Search Relevance Without Model Fine-Tuning",
author = "Varshney, Vaibhav and
MC, Manjunatha Naik",
editor = "Li, Yunyao and
Rehm, Georg and
Tu, Mei",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics ({ACL} 2026)",
month = jul,
year = "2026",
address = "San Diego, California, USA",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-industry.149/",
pages = "2231--2240",
ISBN = "979-8-89176-394-4",
abstract = "Multi-tenant enterprise search platforms serve hundreds of customers through a single shared retrieval model. Fine-tuning on individual customer data is typically prohibited by contractual and regulatory constraints, and maintaining per-customer models does not scale. We present index-time prefix injection, a training-free method that improves retrieval relevance by prepending domain-descriptive natural-language prefixes to documents during indexing. For example, prepending ``IT service management knowledge article:'' to an IT knowledge base shifts its embeddings into a tighter, more domain-coherent region of the vector space. Prefixes are discovered through a tiered strategy: LLM-based generation from document samples when data policies allow, domain-expert curation when they do not, and a standardized prefix library as fallback. Deployed across 18 languages and 400+ customer instances, the approach yields 3{--}8{\%} Hit@5 improvements with zero model training. A/B tests confirm a 4.2{\%} CTR lift. We describe the system design, evaluation at scale, and deployment lessons including failure modes."
}Markdown (Informal)
[Index-Time Prefix Injection for Multi-Tenant Retrieval: Improving Search Relevance Without Model Fine-Tuning](https://preview.aclanthology.org/ingest-acl/2026.acl-industry.149/) (Varshney & MC, ACL 2026)
ACL