{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T02:18:11Z","timestamp":1775787491758,"version":"3.50.1"},"reference-count":51,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,11,16]],"date-time":"2025-11-16T00:00:00Z","timestamp":1763251200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1109\/ase63991.2025.00067","type":"proceedings-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T20:54:38Z","timestamp":1769633678000},"page":"739-751","source":"Crossref","is-referenced-by-count":2,"title":["Watson: A Cognitive Observability Framework for the Reasoning of LLM-Powered Agents"],"prefix":"10.1109","author":[{"given":"Benjamin","family":"Rombaut","sequence":"first","affiliation":[{"name":"Centre for Software Excellence,Huawei,Canada"}]},{"given":"Sogol","family":"Masoumzadeh","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence,Huawei,Canada"}]},{"given":"Kirill","family":"Vasilevski","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence,Huawei,Canada"}]},{"given":"Dayi","family":"Lin","sequence":"additional","affiliation":[{"name":"Centre for Software Excellence,Huawei,Canada"}]},{"given":"Ahmed E.","family":"Hassan","sequence":"additional","affiliation":[{"name":"Queen&#x2019;s University,Kingston,Canada"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Reasoning models don\u2019t always say what they think","volume-title":"Anthropic Technical Report","year":"2025"},{"key":"ref2","volume-title":"Phoenix by Arize","year":"2024"},{"key":"ref3","volume-title":"Chain-of-Thought Is Not Explainability","author":"Barez","year":"2025"},{"key":"ref4","volume-title":"Efficient Training of Language Models to Fill in the Middle","author":"Bavarian","year":"2022"},{"key":"ref5","volume-title":"SWE-bench Lite: A Canonical Subset for Efficient Evaluation of Language Models as Software Engineers","author":"Carlos","year":"2024"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510163"},{"key":"ref8","volume-title":"Reasoning Models Don\u2019t Always Say What They Think","author":"Chen","year":"2025"},{"key":"ref9","volume-title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","author":"DeepSeek-AI","year":"2025"},{"key":"ref10","volume-title":"django Web Framework","year":"2024"},{"key":"ref11","volume-title":"AgentOps: Enabling Observability of LLM Agents","author":"Dong","year":"2024"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/aiware69974.2025.00027"},{"key":"ref13","volume-title":"Dynatrace","year":"2024"},{"key":"ref14","article-title":"InCoder: A Generative Model for Code Infilling and Synthesis","volume-title":"Proceedings of the 11th International Conference on Learning Representations","author":"Fried"},{"key":"ref15","volume-title":"GDB: The GNU Project Debugger"},{"key":"ref16","volume-title":"GitHub Copilot","year":"2024"},{"key":"ref17","article-title":"DS-Agent: Automated Data Science by Empowering Large Language Models with Case-Based Reasoning","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Guo"},{"key":"ref18","volume-title":"Haptik, Drive Business Efficiency at Scale with Generative AI","year":"2024"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3663529.3663849"},{"key":"ref20","volume-title":"Towards AI-Native Software Engineering (SE 3.0): A Vision and a Challenge Roadmap","author":"E. Hassan","year":"2024"},{"key":"ref21","volume-title":"Helicone","year":"2024"},{"key":"ref22","article-title":"Measuring Massive Multitask Language Understanding","volume-title":"International Conference on Learning Representations","author":"Hendrycks"},{"key":"ref23","volume-title":"Automated Design of Agentic Systems","author":"Hu","year":"2024"},{"key":"ref24","volume-title":"A Survey on Hallucination in Large Language Models: Principles, Taxonomy, Challenges, and Open Questions","author":"Huang","year":"2023"},{"key":"ref25","volume-title":"Humanloop","year":"2024"},{"key":"ref26","volume-title":"Introducing Devin, the first AI software engineer","year":"2024"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639138"},{"key":"ref28","volume-title":"SWE-bench: Can Language Models Resolve Real-World GitHub Issues?","author":"Jimenez","year":"2024"},{"key":"ref29","volume-title":"LangSmith by LangChain","year":"2024"},{"key":"ref30","volume-title":"Langfuse","year":"2024"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-021-10063-9"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/tkde.2020.2983930"},{"key":"ref33","volume-title":"AI Transparency in the Age of LLMs: A Human-Centered Research Roadmap","author":"Liao","year":"2023"},{"key":"ref34","volume-title":"Mind Your Step (by Step): Chain-of-Thought can Reduce Performance on Tasks where Thinking Makes Humans Worse","author":"Liu","year":"2024"},{"key":"ref35","volume-title":"Lunary","year":"2024"},{"key":"ref36","volume-title":"The Landscape of Emerging AI Agent Architectures for Reasoning, Planning, and Tool Calling: A Survey","author":"Masterman","year":"2024"},{"key":"ref37","volume-title":"Nebuly - Explicit and Implicit LLM User Feedback Quickguide","year":"2024"},{"key":"ref38","volume-title":"Introducing OpenAI o1","year":"2024"},{"key":"ref39","volume-title":"Qwak - LLMops","year":"2024"},{"key":"ref40","article-title":"Reflexion: language agents with verbal reinforcement learning","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems. NIPS \u201923","author":"Shinn"},{"key":"ref41","article-title":"Dapper, a Large-Scale Distributed Systems Tracing Infrastructure","volume-title":"Tech. rep. Google, Inc.","author":"Sigelman","year":"2010"},{"key":"ref42","volume-title":"Dualformer: Controllable Fast and Slow Thinking by Learning with Randomized Reasoning Traces","author":"Su","year":"2025"},{"key":"ref43","volume-title":"Traceloop","year":"2024"},{"key":"ref44","volume-title":"OpenHands: An Open Platform for AI Software Developers as Generalist Agents","author":"Wang","year":"2025"},{"key":"ref45","volume-title":"Self-Consistency Improves Chain of Thought Reasoning in Language Models","author":"Wang","year":"2023"},{"key":"ref46","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems","author":"Wei"},{"key":"ref47","volume-title":"Weights and Biases - Weave","year":"2024"},{"key":"ref48","volume-title":"WhyLabs","year":"2024"},{"key":"ref49","volume-title":"Are Large Language Models Really Good Logical Reasoners? A Comprehensive Evaluation and Beyond","author":"Xu","year":"2024"},{"key":"ref50","article-title":"Tree of thoughts: deliberate problem solving with large language models","volume-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems","author":"Yao"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1145\/3650212.3680384"}],"event":{"name":"2025 40th IEEE\/ACM International Conference on Automated Software Engineering (ASE)","location":"Seoul, Korea, Republic of","start":{"date-parts":[[2025,11,16]]},"end":{"date-parts":[[2025,11,20]]}},"container-title":["2025 40th IEEE\/ACM International Conference on Automated Software Engineering (ASE)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11334056\/11334198\/11334632.pdf?arnumber=11334632","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,29]],"date-time":"2026-01-29T09:44:29Z","timestamp":1769679869000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11334632\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,16]]},"references-count":51,"URL":"https:\/\/doi.org\/10.1109\/ase63991.2025.00067","relation":{},"subject":[],"published":{"date-parts":[[2025,11,16]]}}}