@inproceedings{guo-etal-2026-emcellllm,
title = "{E}m{C}ell{LLM}: Human Peri-Implantation Embryonic Cell Annotation Based on Large Language Models",
author = "Guo, Xiaorui and
Liu, Zhiwei and
Xie, Qianqian and
Ananiadou, Sophia",
editor = "Demner-Fushman, Dina and
Ananiadou, Sophia and
Roberts, Kirk and
Tsujii, Junichi",
booktitle = "{B}io{NLP} 2026",
month = jul,
year = "2026",
address = "San Diego, California",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-1.30/",
pages = "382--391",
ISBN = "979-8-89176-434-7",
abstract = "The advent of single-cell RNA sequencing has enabled unprecedented resolution of cell fate decisions and regulatory mechanisms during peri-implantation human embryogenesis, in which accurate cell type annotation is a fundamental prerequisite and the first step for subsequent fate and mechanism inference. Large language models (LLMs) have demonstrated outstanding performance in various fields. However, current studies mostly rely on traditional methods and have not explored the application of LLMs in the field of human embryonic cell annotation. The main reason is the lack of instruction tuning datasets and evaluation benchmarks. In this paper, we proposed EmCellLLM, the first open sourced LLMs that are specialized for human embryonic cell type prediction task based on fine-tuning Qwen3-8B with EmCell4Instruction, the first embryonic cell type prediction instruction dataset. To support LLM instruction tuning, we also build EmCellBench, the first benchmark for evaluating human embryonic cell type prediction ability of LLMs. We compare our models with a variety of LLMs on EmCellBench, where our model outperforms all other open-sourced LLMs as well as DeepSeek."
}Markdown (Informal)
[EmCellLLM: Human Peri-Implantation Embryonic Cell Annotation Based on Large Language Models](https://preview.aclanthology.org/ingest-acl-workshops/2026.bionlp-1.30/) (Guo et al., BioNLP 2026)
ACL