@inproceedings{xu-etal-2026-pam,
title = "{PAM}: Enhancing General Alignment of Large Reasoning Models through Priority-Aware Metacognition",
author = "Xu, Zhihao and
Yang, Fuzhen and
Lin, Liang and
Wang, Xiting",
editor = "Liakata, Maria and
Moreira, Viviane P. and
Zhang, Jiajun and
Jurgens, David",
booktitle = "Proceedings of the 64th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)",
month = jul,
year = "2026",
address = "San Diego, California, United States",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/ingest-acl/2026.acl-long.432/",
pages = "9554--9573",
ISBN = "979-8-89176-390-6",
abstract = "Recent advancements in Large Reasoning Models (LRMs) have showcased strong performance across various reasoning tasks by leveraging System-2 thinking capabilities. However, existing studies indicate that this reasoning ability alone does not reliably transfer to the general alignment domain. Inspired by cognitive science and how humans solve tasks, we argue that LRMs must be equipped with metacognitive knowledge to fully utilize their System-2 capabilities. In this paper, we propose Priority-Aware Metacognition (PAM), which guides the model to first identify the top-level human preference (e.g., harmlessness) as a means of understanding the alignment task{'}s nature, and then apply other kinds of metacognitive knowledge to better monitor and regulate the model{'}s thinking process. We implement PAM via a two-stage pipeline: a cold-start phase that collects structured metacognitive knowledge based on Flavell{'}s theoretical framework, and a preference-optimization phase that further reinforces such metacognition. Extensive experiments validate the effectiveness of PAM. Under the same training pipelines, PAM consistently yields higher performance, improving general domain alignment performance by {\textasciitilde}10 points on the helpfulness and harmless benchmarks. Code is available at https://anonymous.4open.science/r/PAM-RM-02DF."
}Markdown (Informal)
[PAM: Enhancing General Alignment of Large Reasoning Models through Priority-Aware Metacognition](https://preview.aclanthology.org/ingest-acl/2026.acl-long.432/) (Xu et al., ACL 2026)
ACL