@inproceedings{pei-etal-2024-enhanced,
title = "Enhanced {B}io{T}5+ for Molecule-Text Translation: A Three-Stage Approach with Data Distillation, Diverse Training, and Voting Ensemble",
author = "Pei, Qizhi and
Wu, Lijun and
Gao, Kaiyuan and
Zhu, Jinhua and
Yan, Rui",
editor = "Edwards, Carl and
Wang, Qingyun and
Li, Manling and
Zhao, Lawrence and
Hope, Tom and
Ji, Heng",
booktitle = "Proceedings of the 1st Workshop on Language + Molecules (L+M 2024)",
month = aug,
year = "2024",
address = "Bangkok, Thailand",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/fix-sig-urls/2024.langmol-1.6/",
doi = "10.18653/v1/2024.langmol-1.6",
pages = "48--54",
abstract = "This paper presents our enhanced BioT5+ method for the Language + Molecules shared task at the ACL 2024 Workshop. The task involves ``translating'' between molecules and natural language, including molecule captioning and text-based molecule generation using the \textit{L+M-24} dataset. Our method consists of three stages. In the first stage, we distill data from various models. In the second stage, combined with \textit{extra} version of the provided dataset, we train diverse models for subsequent voting ensemble.We also adopt Transductive Ensemble Learning (TEL) to enhance these base models. Lastly, all models are integrated using a voting ensemble method. Experimental results demonstrate that BioT5+ achieves superior performance on \textit{L+M-24} dataset. On the final leaderboard, our method (team name: \textbf{qizhipei}) ranks \textbf{first} in the text-based molecule generation task and \textbf{second} in the molecule captioning task, highlighting its efficacy and robustness in translating between molecules and natural language. The pre-trained BioT5+ models are available at \url{https://github.com/QizhiPei/BioT5}."
}
Markdown (Informal)
[Enhanced BioT5+ for Molecule-Text Translation: A Three-Stage Approach with Data Distillation, Diverse Training, and Voting Ensemble](https://preview.aclanthology.org/fix-sig-urls/2024.langmol-1.6/) (Pei et al., LangMol 2024)
ACL