@inproceedings{lindenbauer-etal-2025-knowledge,
title = "From Knowledge to Noise: {CTIM}-Rover and the Pitfalls of Episodic Memory in Software Engineering Agents",
author = "Lindenbauer, Tobias and
Groh, Georg and
Schuetze, Hinrich",
editor = "Kamalloo, Ehsan and
Gontier, Nicolas and
Lu, Xing Han and
Dziri, Nouha and
Murty, Shikhar and
Lacoste, Alexandre",
booktitle = "Proceedings of the 1st Workshop for Research on Agent Language Models (REALM 2025)",
month = jul,
year = "2025",
address = "Vienna, Austria",
publisher = "Association for Computational Linguistics",
url = "https://preview.aclanthology.org/mtsummit-25-ingestion/2025.realm-1.30/",
doi = "10.18653/v1/2025.realm-1.30",
pages = "411--427",
ISBN = "979-8-89176-264-0",
abstract = "We introduce CTIM-Rover, an AI agent for Software Engineering (SE) built on top of AutoCodeRover (Zhang et al., 2024) that extends agentic reasoning frameworks with an episodic memory, more specifically, a general and repository-level Cross-Task-Instance Memory (CTIM). While existing open-source SE agents mostly rely on ReAct (Yao et al., 2023b), Reflexion (Shinn et al., 2023), or Code-Act (Wang et al., 2024), all of these reasoning and planning frameworks inefficiently discard their long-term memory after a single task instance. As repository-level understanding is pivotal for identifying all locations requiring a patch for fixing a bug, we hypothesize that SE is particularly well positioned to benefit from CTIM. For this, we build on the Experiential Learning (EL) approach ExpeL (Zhao et al., 2024), proposing a Mixture-Of-Experts (MoEs) inspired approach to create both a general-purpose and repository-level CTIM . We find that CTIM-Rover does not outperform AutoCodeRover in any configuration and thus conclude that neither ExpeL nor DoT-Bank (Lingam et al., 2024) scale to real-world SE problems. Our analysis indicates noise introduced by distracting CTIM items or exemplar trajectories as the likely source of the performance degradation."
}
Markdown (Informal)
[From Knowledge to Noise: CTIM-Rover and the Pitfalls of Episodic Memory in Software Engineering Agents](https://preview.aclanthology.org/mtsummit-25-ingestion/2025.realm-1.30/) (Lindenbauer et al., REALM 2025)
ACL