@inproceedings{lima-veloso-2026-synthetic,
title = "Synthetic Data Fine-Tuning for Effective Team Formation in Enterprises",
author = "Lima, Guilherme Drummond and
Veloso, Adriano",
editor = {Matusevych, Yevgen and
Eryi{\u{g}}it, G{\"u}l{\c{s}}en and
Aletras, Nikolaos},
booktitle = "Proceedings of the 19th Conference of the {E}uropean Chapter of the {A}ssociation for {C}omputational {L}inguistics (Volume 5: Industry Track)",
month = mar,
year = "2026",
address = "Rabat, Morocco",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-eacl/2026.eacl-industry.46/",
pages = "598--609",
ISBN = "979-8-89176-384-5",
abstract = "We evaluate the effectiveness of synthetic data fine-tuning for Semantic Search in a real-world Enterprise Team Formation problem scenario. In this problem, we aim to retrieve the best employee for a given task, given their information regarding abilities, experiences, and other aspects. We evaluate two synthetic data generation strategies: (1) augmenting real-world data with synthetic labels and (2) generating synthetic profiles for employees tailored to specific tasks. To measure the impact of these strategies, we fine-tune a pretrained text embedding model using LoRA and Rank Aggregation techniques. We evaluate the model performance against current SOTA algorithms on a human-curated dataset. Our experiments indicate that training a model that uses a combination of both Synthetic data generation strategies outperforms already established pre-trained models on the Team Formation task, improving the ranking metrics by an average of 30{\%} in comparison to the best-performing pre-trained model."
}Markdown (Informal)
[Synthetic Data Fine-Tuning for Effective Team Formation in Enterprises](https://preview.aclanthology.org/ingest-eacl/2026.eacl-industry.46/) (Lima & Veloso, EACL 2026)
ACL