{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T12:18:31Z","timestamp":1779365911246,"version":"3.53.0"},"publisher-location":"New York, NY, USA","reference-count":35,"publisher":"ACM","license":[{"start":{"date-parts":[[2026,4,12]],"date-time":"2026-04-12T00:00:00Z","timestamp":1775952000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/legalcode"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2026,4,12]]},"DOI":"10.1145\/3786167.3788427","type":"proceedings-article","created":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T11:40:19Z","timestamp":1779363619000},"page":"96-103","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["AgentFixer: From Failure Detection to Fix Recommendations in Agentic Systems"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-8614-0739","authenticated-orcid":false,"given":"Hadar","family":"Mulian","sequence":"first","affiliation":[{"name":"IBM Research, Haifa, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2540-1604","authenticated-orcid":false,"given":"Sergey","family":"Zeltyn","sequence":"additional","affiliation":[{"name":"IBM Research, Haifa, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7400-3452","authenticated-orcid":false,"given":"Ido","family":"Levy","sequence":"additional","affiliation":[{"name":"IBM Research, Haifa, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-3808-016X","authenticated-orcid":false,"given":"Liane","family":"Galanti","sequence":"additional","affiliation":[{"name":"Princeton University, Princeton, New Jersey, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4698-1927","authenticated-orcid":false,"given":"Avi","family":"Yaeli","sequence":"additional","affiliation":[{"name":"IBM Research, Haifa, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1216-8284","authenticated-orcid":false,"given":"Segev","family":"Shlomov","sequence":"additional","affiliation":[{"name":"IBM Research, Haifa, Israel"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2026,5,21]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Arize AI. 2024. Phoenix: The Open-Source ML Observability and Evaluation Platform. https:\/\/docs.arize.com\/phoenix\/ Documentation hub; Accessed 2025-09-22."},{"key":"e_1_3_3_1_3_2","unstructured":"Mert Cemri Melissa\u00a0Z. Pan Shuyi Yang and Lakshya A\u00a0Agrawal et al.2025. Why Do Multi-Agent LLM Systems Fail? ArXiv abs\/2503.13657 (2025)."},{"key":"e_1_3_3_1_4_2","unstructured":"CrewAI. 2025. Evaluating Use Cases (CrewAI Documentation). https:\/\/docs.crewai.com\/guides\/concepts\/evaluating-use-cases Accessed 2025-09-22."},{"key":"e_1_3_3_1_5_2","unstructured":"Darshan Deshpande Varun Gangal Hersh Mehta Jitin Krishnan Anand Kannappan and Rebecca Qian. 2025. TRAIL: Trace Reasoning and Agentic Issue Localization. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.08638 (2025)."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-demo.8"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1128"},{"key":"e_1_3_3_1_8_2","unstructured":"Jiawei Gu Xuhui Jiang Zhichao Shi and Hexiang et\u00a0al. Tan. 2024. A survey on llm-as-a-judge. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2411.15594 (2024)."},{"key":"e_1_3_3_1_9_2","unstructured":"Guardrails AI. 2025. Generate Structured Data & Use Validators for Structured Data Validation. https:\/\/guardrailsai.com\/docs\/how_to_guides\/generate_structured_data. Validators and guards for input\/output policy checks."},{"key":"e_1_3_3_1_10_2","unstructured":"Junda He Christoph Treude and David Lo. 2025. LLM-Based Multi-Agent Systems for Software Engineering: Literature Review Vision and the Road Ahead. ACM Transactions on Software Engineering and Methodology (2025)."},{"key":"e_1_3_3_1_11_2","unstructured":"Jen-tse Huang Jiaxu Zhou Tailin Jin and Xuhui et\u00a0al. Zhou. 2024. On the resilience of llm-based multi-agent collaboration with faulty agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.00989 (2024)."},{"key":"e_1_3_3_1_12_2","unstructured":"Hugging Face. 2024. Structured Outputs (Inference Providers). https:\/\/huggingface.co\/docs\/inference-providers\/en\/guides\/structured-output Accessed 2025-09-22."},{"key":"e_1_3_3_1_13_2","unstructured":"LangChain. 2024. LangSmith: LLM Application Observability and Testing Platform. https:\/\/www.langchain.com\/langsmith. Accessed: 2024-12-31."},{"key":"e_1_3_3_1_14_2","unstructured":"LangChain. 2025. How to Think About Agent Frameworks. Blog post. https:\/\/blog.langchain.com\/how-to-think-about-agent-frameworks\/ Accessed 2025-09-22."},{"key":"e_1_3_3_1_15_2","unstructured":"Langfuse. 2024. AI Agent Observability with Langfuse. Blog post. https:\/\/langfuse.com\/blog\/2024-07-ai-agent-observability-with-langfuse Accessed 2025-09-22."},{"key":"e_1_3_3_1_16_2","volume-title":"The Twelfth International Conference on Learning Representations","author":"Liu Xiao","year":"2023","unstructured":"Xiao Liu, Hao Yu, Hanchen Zhang, Yifan Xu, and Lei et al.2023. AgentBench: Evaluating LLMs as Agents. In The Twelfth International Conference on Learning Representations."},{"key":"e_1_3_3_1_17_2","unstructured":"Sami Marreed Alon Oved Avi Yaeli Segev Shlomov Ido Levy Offer Akrabi Aviad Sela Asaf Adi and Nir Mashkif. 2025. Towards enterprise-ready computer using generalist agent. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2503.01861 (2025)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3711896.3736570"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1109\/ASE63991.2025.00327"},{"key":"e_1_3_3_1_20_2","unstructured":"OpenAI. 2023. openai\/evals: A framework for evaluating LLMs and LLM applications. https:\/\/github.com\/openai\/evals. GitHub repository."},{"key":"e_1_3_3_1_21_2","unstructured":"OpenAI. 2024. Structured Outputs. https:\/\/platform.openai.com\/docs\/guides\/structured-outputs Developer documentation; Accessed 2025-09-22."},{"key":"e_1_3_3_1_22_2","unstructured":"OpenTelemetry Project. 2025. AI Agent Observability (OpenTelemetry Blog). https:\/\/opentelemetry.io\/blog\/2025\/ai-agent-observability\/ Accessed 2025-09-22."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v39i28.35153"},{"key":"e_1_3_3_1_24_2","unstructured":"Pydantic. 2024. PydanticAI: Output validation. https:\/\/ai.pydantic.dev\/output\/ Accessed 2025-09-22."},{"key":"e_1_3_3_1_25_2","volume-title":"Proceedings of ACL","author":"Schick Timo","year":"2024","unstructured":"Timo Schick and Hinrich Sch\u00fctze. 2024. JSONformer: Correct-by-Construction JSON Generation with Language Models. In Proceedings of ACL."},{"key":"e_1_3_3_1_26_2","unstructured":"Sivan Schwartz Avi Yaeli and Segev Shlomov. 2023. Enhancing trust in LLM-based AI automation agents: New considerations and future challenges. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.05391 (2023)."},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3654777.3676450"},{"key":"e_1_3_3_1_28_2","unstructured":"Lin Shi Chiyu Ma Wenhua Liang and Xingjian et\u00a0al. Diao. 2024. Judging the judges: A systematic study of position bias in llm-as-a-judge. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2406.07791 (2024)."},{"key":"e_1_3_3_1_29_2","unstructured":"Segev Shlomov Alon Oved Sami Marreed and Ido\u00a0Levy et al.2025. From Benchmarks to Business Impact: Deploying IBM Generalist Agent in Enterprise Production. arxiv:https:\/\/arXiv.org\/abs\/2510.23856\u00a0[cs.AI] https:\/\/arxiv.org\/abs\/2510.23856"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"crossref","unstructured":"Segev Shlomov Aviad Sela Ido Levy Liane Galanti Roy Abitbol et\u00a0al. 2024. From grounding to planning: Benchmarking bottlenecks in web agents. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.01927 (2024).","DOI":"10.3233\/FAIA251390"},{"key":"e_1_3_3_1_31_2","volume-title":"Proceedings of EMNLP","author":"Tam Kevin","year":"2024","unstructured":"Kevin Tam, Elena Rossi, and Soojin Park. 2024. Robust Structured Text Generation via Validation and Self-Repair. In Proceedings of EMNLP."},{"key":"e_1_3_3_1_32_2","unstructured":"Darren Yow-Bang Wang Zhengyuan Shen Soumya\u00a0Smruti Mishra Zhichao Xu Yifei Teng and Haibo Ding. 2025. SLOT: Structuring the Output of Large Language Models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2505.04016 (2025)."},{"key":"e_1_3_3_1_33_2","unstructured":"Sergey Zeltyn Segev Shlomov Avi Yaeli and Alon Oved. 2022. Prescriptive process monitoring in intelligent process automation with chatbot orchestration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.06564 (2022)."},{"key":"e_1_3_3_1_34_2","volume-title":"Proceedings of NAACL","author":"Zhang Liwei","year":"2025","unstructured":"Liwei Zhang, Yuxin Han, and Ananya Gupta. 2025. Schema-Adherent Generation with Reinforcement Learning. In Proceedings of NAACL."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng and Siyuan et\u00a0al. Zhuang. 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in neural information processing systems 36 (2023) 46595\u201346623.","DOI":"10.52202\/075280-2020"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Lianmin Zheng Wei-Lin Chiang Ying Sheng and Siyuan et\u00a0al. Zhuang. 2023. Judging llm-as-a-judge with mt-bench and chatbot arena. Advances in neural information processing systems 36 (2023) 46595\u201346623.","DOI":"10.52202\/075280-2020"}],"event":{"name":"AGENT '26: International Workshop on Agentic Engineering","location":"Rio de Janeiro Brazil","acronym":"AGENT '26","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS","Faculty of Engineering of University of Porto"]},"container-title":["Proceedings of the 2026 International Workshop on Agentic Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3786167.3788427","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T12:03:29Z","timestamp":1779365009000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3786167.3788427"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,4,12]]},"references-count":35,"alternative-id":["10.1145\/3786167.3788427","10.1145\/3786167"],"URL":"https:\/\/doi.org\/10.1145\/3786167.3788427","relation":{},"subject":[],"published":{"date-parts":[[2026,4,12]]},"assertion":[{"value":"2026-05-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}