@inproceedings{chen-etal-2024-learning,
title = "Learning High-Quality and General-Purpose Phrase Representations",
author = "Chen, Lihu and
Varoquaux, Gael and
Suchanek, Fabian",
editor = "Graham, Yvette and
Purver, Matthew",
booktitle = "Findings of the Association for Computational Linguistics: EACL 2024",
month = mar,
year = "2024",
address = "St. Julian{'}s, Malta",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2024.findings-eacl.66/",
pages = "983--994",
abstract = "Phrase representations play an important role in data science and natural language processing, benefiting various tasks like Entity Alignment, Record Linkage, Fuzzy Joins, and Paraphrase Classification.The current state-of-the-art method involves fine-tuning pre-trained language models for phrasal embeddings using contrastive learning. However, we have identified areas for improvement. First, these pre-trained models tend to be unnecessarily complex and require to be pre-trained on a corpus with context sentences.Second, leveraging the phrase type and morphology gives phrase representations that are both more precise and more flexible.We propose an improved framework to learn phrase representations in a context-free fashion.The framework employs phrase type classification as an auxiliary task and incorporates character-level information more effectively into the phrase representation.Furthermore, we design three granularities of data augmentation to increase the diversity of training samples.Our experiments across a wide range of tasks reveal that our approach generates superior phrase embeddings compared to previous methods while requiring a smaller model size."
}
Markdown (Informal)
[Learning High-Quality and General-Purpose Phrase Representations](https://preview.aclanthology.org/Ingest-2025-COMPUTEL/2024.findings-eacl.66/) (Chen et al., Findings 2024)
ACL