{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T05:13:01Z","timestamp":1778130781477,"version":"3.51.4"},"reference-count":42,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T00:00:00Z","timestamp":1773705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,3,17]],"date-time":"2026-03-17T00:00:00Z","timestamp":1773705600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,3,17]]},"DOI":"10.1109\/saner-c67878.2026.00057","type":"proceedings-article","created":{"date-parts":[[2026,5,6]],"date-time":"2026-05-06T19:39:18Z","timestamp":1778096358000},"page":"377-384","source":"Crossref","is-referenced-by-count":0,"title":["TAM-Eval: Evaluating LLMs for Automated Unit Test Maintenance"],"prefix":"10.1109","author":[{"given":"Elena","family":"Bruches","sequence":"first","affiliation":[{"name":"Siberian Neuronets LLC,Novosibirsk,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vadim","family":"Alperovich","sequence":"additional","affiliation":[{"name":"T-Technologies,Moscow,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dari","family":"Baturova","sequence":"additional","affiliation":[{"name":"Siberian Neuronets LLC,Novosibirsk,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Roman","family":"Derunets","sequence":"additional","affiliation":[{"name":"Siberian Neuronets LLC,Novosibirsk,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Daniil","family":"Grebenkin","sequence":"additional","affiliation":[{"name":"Siberian Neuronets LLC,Novosibirsk,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Georgy","family":"Mkrtchyan","sequence":"additional","affiliation":[{"name":"T-Technologies,Moscow,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Oleg","family":"Sedukhin","sequence":"additional","affiliation":[{"name":"Siberian Neuronets LLC,Novosibirsk,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mikhail","family":"Klementev","sequence":"additional","affiliation":[{"name":"Siberian Neuronets LLC,Novosibirsk,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ivan","family":"Bondarenko","sequence":"additional","affiliation":[{"name":"Novosibirsk State University,Novosibirsk,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nikolay","family":"Bushkov","sequence":"additional","affiliation":[{"name":"T-Technologies,Moscow,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stanislav","family":"Moiseev","sequence":"additional","affiliation":[{"name":"T-Technologies,Moscow,Russia"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1002\/9780470165171"},{"key":"ref2","volume-title":"How Much Does Software Testing Cost in 2025? - globalapptesting.com","year":"2025"},{"key":"ref3","volume-title":"A survey on large language models for code generation","author":"Jiang","year":"2024"},{"key":"ref4","doi-asserted-by":"crossref","DOI":"10.1145\/3691620.3695529","volume-title":"On the evaluation of large language models in unit test generation","author":"Yang","year":"2024"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1145\/3664646.3664765"},{"key":"ref6","volume-title":"An empirical study on the code refactoring capability of large language models","author":"Cordeiro","year":"2024"},{"key":"ref7","volume-title":"An empirical study on Ilm-based agents for automated bug fixing","author":"Meng","year":"2024"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/tse.1976.233818"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/1297846.1297902"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2025113.2025179"},{"key":"ref11","volume-title":"The prompt alchemist: Automated llm-tailored prompt optimization for test case generation","author":"Gao","year":"2025"},{"key":"ref12","volume-title":"Large language models cannot self-correct reasoning yet","author":"Huang","year":"2024"},{"key":"ref13","article-title":"Is self-repair a silver bullet for code generation?","volume-title":"International Conference on Learning Representations (ICLR)","author":"Olausson"},{"key":"ref14","article-title":"Chain-of-thought prompting elicits reasoning in large language models","volume-title":"Proceedings of the 36th International Conference on Neural Information Processing Systems, ser. NIPS \u201922","author":"Wei"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.52202\/075280-0517"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3660783"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/icse48619.2023.00194"},{"key":"ref18","volume-title":"Testart: Improving llm-based unit testing via co-evolution of automated generation and repair iteration","author":"Gu","year":"2025"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/3663529.3663801"},{"key":"ref20","volume-title":"Llm test generation via iterative hybrid program analysis","author":"Gu","year":"2025"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2023.3334955"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.52202\/079017-2601"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3663529.3663839"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3508398.3511495"},{"key":"ref25","volume-title":"Long code arena: a set of benchmarks for long-context code models","author":"Bogomolov","year":"2024"},{"key":"ref26","volume-title":"Swe-smith: Scaling data for software engineering agents","author":"Yang","year":"2025"},{"key":"ref27","author":"Zhang","year":"2024","journal-title":"Testbench: Evaluating class-level test case generation capability of large language models"},{"key":"ref28","volume-title":"Projecttest: A project-level 11 m unit test generation benchmark and impact of error fixing mechanisms","author":"Wang","year":"2025"},{"key":"ref29","article-title":"SWE-bench: Can language models resolve realworld github issues?","volume-title":"The Twelfth International Conference on Learning Representations","author":"Jimenez"},{"key":"ref30","volume-title":"Cpp-ut-bench: Can 11 ms write complex unit tests in c++?","author":"Bhargava","year":"2024"},{"key":"ref31","volume-title":"Clover: A test case generation benchmark with coverage, long-context, and verification","author":"Xu","year":"2025"},{"key":"ref32","doi-asserted-by":"crossref","first-page":"3547","DOI":"10.18653\/v1\/2025.findings-naacl.197","article-title":"TESTEVAL: Benchmarking large language models for test case generation","volume-title":"Findings of the Association for Computational Linguistics: NAACL 2025","author":"Wang","year":"2025"},{"key":"ref33","volume-title":"Prompting large language models to tackle the full software development lifecycle: A case study","author":"Li","year":"2024"},{"key":"ref34","volume-title":"Testgeneval: A real world unit test generation and test completion benchmark","author":"Jain","year":"2025"},{"key":"ref35","volume-title":"Mera code: A unified framework for evaluating code generation across tasks","author":"Chervyakov","year":"2025"},{"key":"ref36","volume-title":"DevStral: Introducing the best open-source model for coding agents","author":"Mistral","year":"2025"},{"key":"ref37","volume-title":"Qwen3 technical report","author":"Yang","year":"2025"},{"key":"ref38","volume-title":"Deepseek-v3 technical report","year":"2024"},{"key":"ref39","volume-title":"Gpt-oss-120b & gpt-oss-20b model card","year":"2025"},{"key":"ref40","author":"Comanici","year":"2025","journal-title":"Gemini 2.5: Pushing the frontier with advanced reasoning, multimodality, long context, and next generation agentic capabilities"},{"key":"ref41","volume-title":"Introducing GPT-5 in the API","year":"2025"},{"key":"ref42","article-title":"RM -RF: Reward model for run-free unit test evaluation","volume-title":"Proceedings of the 33rd IEEE International Conference on Software Analysis, Evolution and Reengineering (SANER)","author":"Bruches"}],"event":{"name":"2026 IEEE International Conference on Software Analysis, Evolution and Reengineering - Companion (SANER-C)","location":"Limassol, Cyprus","start":{"date-parts":[[2026,3,17]]},"end":{"date-parts":[[2026,3,20]]}},"container-title":["2026 IEEE International Conference on Software Analysis, Evolution and Reengineering - Companion (SANER-C)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11500139\/11499992\/11500203.pdf?arnumber=11500203","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T04:28:10Z","timestamp":1778128090000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11500203\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,3,17]]},"references-count":42,"URL":"https:\/\/doi.org\/10.1109\/saner-c67878.2026.00057","relation":{},"subject":[],"published":{"date-parts":[[2026,3,17]]}}}