{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T21:35:40Z","timestamp":1774042540151,"version":"3.50.1"},"reference-count":81,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,8]],"date-time":"2025-12-08T00:00:00Z","timestamp":1765152000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,8]]},"DOI":"10.1109\/bigdata66926.2025.11401679","type":"proceedings-article","created":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T20:57:57Z","timestamp":1772830677000},"page":"1847-1856","source":"Crossref","is-referenced-by-count":0,"title":["Automated Structural Testing of LLM-Based Agents: Methods, Framework, and Case Studies"],"prefix":"10.1109","author":[{"given":"Jens","family":"Kohl","sequence":"first","affiliation":[{"name":"BMW Group,Munich,Germany"}]},{"given":"Otto","family":"Kruse","sequence":"additional","affiliation":[{"name":"Amazon Web Services"}]},{"given":"Youssef","family":"Mostafa","sequence":"additional","affiliation":[{"name":"BMW Group,Munich,Germany"}]},{"given":"Andre","family":"Luckow","sequence":"additional","affiliation":[{"name":"BMW Group,Munich,Germany"}]},{"given":"Karsten","family":"Schroer","sequence":"additional","affiliation":[{"name":"Amazon Web Services"}]},{"given":"Thomas","family":"Riedl","sequence":"additional","affiliation":[{"name":"BMW Group,Munich,Germany"}]},{"given":"Ryan","family":"French","sequence":"additional","affiliation":[{"name":"Amazon Web Services"}]},{"given":"David","family":"Katz","sequence":"additional","affiliation":[{"name":"BMW Group,Munich,Germany"}]},{"given":"Manuel P.","family":"Luitz","sequence":"additional","affiliation":[{"name":"BMW Group,Munich,Germany"}]},{"given":"Tanrajbir","family":"Takher","sequence":"additional","affiliation":[{"name":"Amazon Web Services"}]},{"given":"Ken E.","family":"Friedl","sequence":"additional","affiliation":[{"name":"BMW Group,Munich,Germany"}]},{"given":"C\u00e9line","family":"Laurent-Winter","sequence":"additional","affiliation":[{"name":"BMW Group,Munich,Germany"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Why agents are the next frontier of generative AI","volume-title":"McKinsey Digital Practice","author":"Yee","year":"2024"},{"key":"ref2","volume-title":"Top technology trends 2025","year":"2024"},{"key":"ref3","volume-title":"From LLMs to LLM-based agents for software engineering: a survey of current, challenges and future","author":"Jin","year":"2024"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/102792.102801"},{"key":"ref5","volume-title":"Designing data-intensive applications: the big ideas behind reliable, scalable, and maintainable systems","author":"Kleppmann","year":"2017"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/3571730"},{"key":"ref7","volume-title":"Airline held liable for its chatbot giving passenger bad advice - what this means for travellers","author":"Yagoda","year":"2025"},{"key":"ref8","volume-title":"Universal and transferable adversarial attacks on aligned language models","author":"Zou","year":"2023"},{"key":"ref9","author":"Dickson","year":"2025","journal-title":"Jailbreaking Grok-4: how a \u2019one-two punch\u2019 attack bypasses the world\u2019s \u2019smartest\u2019 AI"},{"key":"ref10","volume-title":"LLMs get lost in multi-turn conversation","author":"Laban","year":"2025"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1017\/9781316771273"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2022.111549"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2020"},{"key":"ref14","volume-title":"A survey on LLM-as-a-judge","author":"Gu","year":"2024"},{"key":"ref15","volume-title":"One token to fool LLM-as-a-judge","author":"Zhao","year":"2025"},{"key":"ref16","first-page":"52","article-title":"Can large language models truly understand prompts? A case study with negated prompts","volume-title":"Proceedings of the 1st Transfer Learning for Natural Language Processing Workshop","author":"Jang","year":"2023"},{"key":"ref17","volume-title":"Large language model based multi-agents: a survey of progress and challenges","author":"Guo","year":"2024"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s11432-024-4222-0"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3744746"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/s11704-024-40231-1"},{"key":"ref21","article-title":"React: synergizing reasoning and acting in language models","volume-title":"11th International Conference on Learning Representations","author":"Yao","year":"2023"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3442188.3445922"},{"key":"ref23","volume-title":"Cognitive mirage: a review of hallucinations in large language models","author":"Ye","year":"2023"},{"key":"ref24","volume-title":"A comprehensive survey of hallucination mitigation techniques in large language models","author":"Tonmoy","year":"2024"},{"key":"ref25","volume-title":"Hallucination is inevitable: an innate limitation of large language models","author":"Xu","year":"2024"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-99965-9_39"},{"key":"ref27","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2024.findings-acl.443","volume-title":"A comprehensive study of jailbreak attack versus defense for large language models","author":"Xu","year":"2024"},{"key":"ref28","article-title":"Open Worldwide Application Security Project (OWASP)","volume-title":"OWASP Top 10 for LLM applications","year":"2025"},{"key":"ref29","article-title":"The MITRE Corporation","volume-title":"ATT&CK Matrix for enterprise","year":"2025"},{"key":"ref30","volume-title":"Why do multi-agent LLM systems fail?","author":"Cemri","year":"2025"},{"key":"ref31","volume-title":"Defining and detecting the defects of the large language model-based autonomous agents","author":"Ning","year":"2024"},{"key":"ref32","volume-title":"Evil geniuses: delving into the safety of LLM-based agents","author":"Tian","year":"2023"},{"key":"ref33","article-title":"Software engineering: R&D trends and defense needs","author":"Boehm","year":"1979","journal-title":"Research directions in software technology"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3641289"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE-FoSE59343.2023.00009"},{"key":"ref36","volume-title":"Survey on evaluation of LLM-based agents","author":"Yehudai","year":"2025"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3703412.3703439"},{"key":"ref38","volume-title":"TrustAgent: towards safe and trustworthy LLM-based agents","author":"Hua","year":"2024"},{"key":"ref39","volume-title":"Generative AI Toolkit - a framework for increasing the quality of LLM-based applications over their whole life cycle","author":"Kohl","year":"2024"},{"key":"ref40","volume-title":"The art of software testing","author":"Myers","year":"1979"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/355604.361591"},{"key":"ref42","volume-title":"Software testing techniques","author":"Beizer","year":"1990"},{"key":"ref43","volume-title":"Software engineering","author":"Sommerville","year":"2015"},{"key":"ref44","volume-title":"A handbook of software and systems engineering: empirical observations, laws, and theories","author":"Endres","year":"2003"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1147\/sj.411.0004"},{"key":"ref46","volume-title":"Software test automation","author":"Fewster","year":"1999"},{"key":"ref47","volume-title":"Experiences of test automation: case studies of software test automation","author":"Graham","year":"2012"},{"key":"ref48","volume-title":"Succeeding with agile: software development using Scrum","author":"Cohn","year":"2010"},{"key":"ref49","article-title":"Opening keynote: move fast & don\u2019t break things","volume-title":"Google Test Automation Conference","author":"Mehta","year":"2014"},{"key":"ref50","volume-title":"Just say no to more end-to-end tests","author":"Wacker","year":"2025"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/mahc.1983.10102"},{"key":"ref52","volume-title":"Continuous integration","author":"Fowler","year":"2025"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2016.03.003"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2017.34"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1155\/2010\/620836"},{"key":"ref56","doi-asserted-by":"crossref","DOI":"10.5220\/0009766800270038","volume-title":"Software test automation maturity - a survey of the state of the practice","author":"Wang","year":"2020"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/IWAST.2012.6228988"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/3549206.3549321"},{"key":"ref59","article-title":"Test automation: reducing time to market","volume-title":"International Conference on Software Testing, Analysis & Review","author":"Dougherty","year":"2002"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICSTW.2014.34"},{"key":"ref61","volume-title":"Test driven development: by example","author":"Beck","year":"2022"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/APSEC.2011.44"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1145\/2851613.2851778"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/MS.2010.152"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2012.28"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/1159733.1159788"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1007\/s11219-011-9130-2"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1145\/3382494.3410687"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72781-8_3"},{"key":"ref70","first-page":"1583","article-title":"Test-Driven Development and LLM-based code generation","volume-title":"Proceedings of the 39th IEEE\/ACM International Conference on Automated Software Engineering. ACM","author":"Mathews","year":"2024"},{"key":"ref71","volume-title":"The complete guide for TDD with LLMs","author":"Chaves","year":"2025"},{"key":"ref72","volume-title":"Test driven development (TDD) of LLM \/ agent applications with PyTest","author":"Krawczyk","year":"2025"},{"key":"ref73","volume-title":"Automate building guardrails for Amazon Bedrock using test-driven development","author":"Patel","year":"2025"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/WPC.2001.921714"},{"key":"ref75","article-title":"Dapper, a large-scale distributed systems tracing infrastructure","author":"Sigelman","year":"2010","journal-title":"Google Inc., Tech. Rep."},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2023.111793"},{"key":"ref77","volume-title":"Mocks aren\u2019t stubs","author":"Fowler","year":"2025"},{"key":"ref78","volume-title":"Design Patterns: elements of reusable object-oriented software","author":"Gamma","year":"1994"},{"key":"ref79","first-page":"9459","article-title":"Retrieval-augmented generation for knowledge-intensive NLP tasks","volume":"33","author":"Lewis","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref80","article-title":"ISO - International Organization for Standardization","year":"2021","journal-title":"ISO\/IEC\/ IEEE 29119 Software Testing"},{"key":"ref81","volume-title":"Innovating at speed: BMW\u2019s generative AI solution for cloud incident analysis","author":"Wildgruber","year":"2025"}],"event":{"name":"2025 IEEE International Conference on Big Data (BigData)","location":"Macau, China","start":{"date-parts":[[2025,12,8]]},"end":{"date-parts":[[2025,12,11]]}},"container-title":["2025 IEEE International Conference on Big Data (BigData)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11400704\/11400712\/11401679.pdf?arnumber=11401679","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,20]],"date-time":"2026-03-20T19:55:15Z","timestamp":1774036515000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11401679\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,8]]},"references-count":81,"URL":"https:\/\/doi.org\/10.1109\/bigdata66926.2025.11401679","relation":{},"subject":[],"published":{"date-parts":[[2025,12,8]]}}}