@inproceedings{xu-etal-2025-team,
    title = "Team {XSZ} at {B}io{L}ay{S}umm2025: Section-Wise Summarization, Retrieval-Augmented {LLM}, and Reinforcement Learning Fine-Tuning for Lay Summaries",
    author = "Xu, Pengcheng  and
      Shen, Sicheng  and
      Zhou, Jieli  and
      Xin, Hongyi",
    editor = "Soni, Sarvesh  and
      Demner-Fushman, Dina",
    booktitle = "Proceedings of the 24th Workshop on Biomedical Language Processing (Shared Tasks)",
    month = aug,
    year = "2025",
    address = "Vienna, Austria",
    publisher = "Association for Computational Linguistics",
    url = "https://preview.aclanthology.org/ingest-emnlp/2025.bionlp-share.33/",
    doi = "10.18653/v1/2025.bionlp-share.33",
    pages = "275--280",
    ISBN = "979-8-89176-276-3",
    abstract = "We propose a unified, multi-stage lay summarization pipeline for BioLaySumm 2025 (Subtask 1.1) that (1) selects and summarizes key article sections via BioBART, (2) retrieves K-shot demonstrations using BGE embeddings for in-context Llama 3 8B prompting, (3) applies LoRA adapters to Llama 3 8B for supervised fine-tuning, (4) merges section summaries with a second BioBART pass, and (5) refines outputs through reinforcement learning (PPO {\&} GRPO) using a composite reward of factuality (AlignScore, SummaC), relevance (ROUGE-L, BERTScore), and readability (LENS, FKGL, DCRS, CLI). On PLOS and eLife validation sets, our complete systemreduces DCRS from 9.23 to 8.56 and reduces CLI from 12.98 to 12.65, ranking 3rd in readability. and outperforms llama3 finetune baseline in AlignScore 0.722 to 0.862, ranking 5th in factuality, demonstrating balanced gains across readability, relevance, and factuality."
}