@inproceedings{constantinides-etal-2025-generalized,
title = "Generalized Embedding Models for Industry 4.0 Applications",
author = "Constantinides, Christodoulos and
Lin, Shuxin and
Patel, Dhaval C",
editor = "Potdar, Saloni and
Rojas-Barahona, Lina and
Montella, Sebastien",
booktitle = "Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track",
month = nov,
year = "2025",
address = "Suzhou (China)",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.155/",
pages = "2234--2251",
ISBN = "979-8-89176-333-3",
abstract = "In this work, we present the first embedding model specifically designed for Industry 4.0 applications, targeting the semantics of industrial asset operations. Given natural language tasks related to specific assets, our model retrieves relevant items and generalizes to queries involving similar assets, such as identifying sensors relevant to an asset{'}s failure mode. We systematically construct nine asset-specific datasets using an expert-validated knowledge base reflecting real operational scenarios. To ensure contextually rich embeddings, we augment queries with Large Language Models, generating concise entity descriptions that capture domain-specific nuances. Across five embedding models ranging from BERT (110M) to gte-Qwen (7B), we observe substantial in-domain gains: \textbf{HIT@1 +54.2{\%}, MAP@100 +50.1{\%}, NDCG@10 +54.7{\%}} on average. Ablation studies reveal that (a) LLM-based query augmentation significantly improves embedding quality; (b) contrastive objectives without in-batch negatives are more effective for tasks with many relevant items; and (c) balancing positives and negatives in batches is essential. We evaluate on a new task and finally present a case study wrapping them as tools and providing them to a planning agent. The code can be found here."
}Markdown (Informal)
[Generalized Embedding Models for Industry 4.0 Applications](https://preview.aclanthology.org/ingest-emnlp/2025.emnlp-industry.155/) (Constantinides et al., EMNLP 2025)
ACL
- Christodoulos Constantinides, Shuxin Lin, and Dhaval C Patel. 2025. Generalized Embedding Models for Industry 4.0 Applications. In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing: Industry Track, pages 2234–2251, Suzhou (China). Association for Computational Linguistics.