{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T12:39:50Z","timestamp":1768999190146,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,17]]},"DOI":"10.1145\/3756681.3757000","type":"proceedings-article","created":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:30:04Z","timestamp":1766565004000},"page":"959-968","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Assertion Messages with Large Language Models (LLMs) for Code"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-7939-0707","authenticated-orcid":false,"given":"Ahmed","family":"Aljohani","sequence":"first","affiliation":[{"name":"University of North Texas, Denton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5867-8699","authenticated-orcid":false,"given":"Anamul Haque","family":"Mollah","sequence":"additional","affiliation":[{"name":"University of North Texas, Denton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3298-859X","authenticated-orcid":false,"given":"Hyunsook","family":"Do","sequence":"additional","affiliation":[{"name":"University of North Texas, Denton, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,12,24]]},"reference":[{"key":"e_1_3_3_2_2_2","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639183"},{"key":"e_1_3_3_2_3_2","unstructured":"Mistral AI. 2024. Introducing Codestral: A State-of-the-Art Code Language Model. https:\/\/mistral.ai\/news\/codestral-2501. Accessed: 2025-02-25."},{"key":"e_1_3_3_2_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3463274.3463335"},{"key":"e_1_3_3_2_5_2","doi-asserted-by":"publisher","DOI":"10.1145\/3605098.3636058"},{"key":"e_1_3_3_2_6_2","first-page":"65","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65\u201372."},{"key":"e_1_3_3_2_7_2","unstructured":"Mohammad Bavarian Heewoo Jun Nikolas Tezak John Schulman Christine McLeavey Jerry Tworek and Mark Chen. 2022. Efficient training of language models to fill in the middle. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2207.14255 (2022)."},{"key":"e_1_3_3_2_8_2","doi-asserted-by":"crossref","unstructured":"Gabriele Bavota Abdallah Qusef Rocco Oliveto Andrea De\u00a0Lucia and Dave Binkley. 2015. Are test smells really harmful? an empirical study. Empirical Software Engineering 20 (2015) 1052\u20131094.","DOI":"10.1007\/s10664-014-9313-0"},{"key":"e_1_3_3_2_9_2","unstructured":"Amirhossein Deljouyi Roham Koohestani Maliheh Izadi and Andy Zaidman. 2024. Leveraging large language models for enhancing the understandability of generated unit tests. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.11710 (2024)."},{"key":"e_1_3_3_2_10_2","doi-asserted-by":"publisher","DOI":"10.1145\/2025113.2025179"},{"key":"e_1_3_3_2_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3524610.3527909"},{"key":"e_1_3_3_2_12_2","doi-asserted-by":"crossref","unstructured":"Yibo He Jiaming Huang Hao Yu and Tao Xie. 2024. An empirical study on focal methods in deep-learning-based approaches for assertion generation. Proceedings of the ACM on Software Engineering 1 FSE (2024) 1750\u20131771.","DOI":"10.1145\/3660785"},{"key":"e_1_3_3_2_13_2","unstructured":"Binyuan Hui Jian Yang Zeyu Cui Jiaxi Yang Dayiheng Liu Lei Zhang Tianyu Liu Jiajun Zhang Bowen Yu Kai Dang et\u00a0al. 2024. Qwen2. 5-Coder Technical Report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2409.12186 (2024)."},{"key":"e_1_3_3_2_14_2","volume-title":"Unit testing principles, practices, and patterns","author":"Khorikov Vladimir","year":"2020","unstructured":"Vladimir Khorikov. 2020. Unit testing principles, practices, and patterns. Simon and Schuster."},{"key":"e_1_3_3_2_15_2","unstructured":"Raymond Li Loubna\u00a0Ben Allal Yangtian Zi Niklas Muennighoff Denis Kocetkov Chenghao Mou Marc Marone Christopher Akiki Jia Li Jenny Chim Qian Liu Evgenii Zheltonozhskii Terry\u00a0Yue Zhuo Thomas Wang Olivier Dehaene Mishig Davaadorj Joel Lamy-Poirier Jo\u00e3o Monteiro Oleh Shliazhko Nicolas Gontier Nicholas Meade Armel Zebaze Ming-Ho Yee Logesh\u00a0Kumar Umapathi Jian Zhu Benjamin Lipkin Muhtasham Oblokulov Zhiruo Wang Rudra Murthy Jason Stillerman Siva\u00a0Sankalp Patel Dmitry Abulkhanov Marco Zocca Manan Dey Zhihan Zhang Nour Fahmy Urvashi Bhattacharyya Wenhao Yu Swayam Singh Sasha Luccioni Paulo Villegas Maxim Kunakov Fedor Zhdanov Manuel Romero Tony Lee Nadav Timor Jennifer Ding Claire Schlesinger Hailey Schoelkopf Jan Ebert Tri Dao Mayank Mishra Alex Gu Jennifer Robinson Carolyn\u00a0Jane Anderson Brendan Dolan-Gavitt Danish Contractor Siva Reddy Daniel Fried Dzmitry Bahdanau Yacine Jernite Carlos\u00a0Mu\u00f1oz Ferrandis Sean Hughes Thomas Wolf Arjun Guha Leandro von Werra and Harm de Vries. 2023. StarCoder: may the source be with you! (2023). arxiv:https:\/\/arXiv.org\/abs\/2305.06161\u00a0[cs.CL]"},{"key":"e_1_3_3_2_16_2","first-page":"74","volume-title":"Text summarization branches out","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_3_2_17_2","unstructured":"Jiawei Liu Chunqiu\u00a0Steven Xia Yuyao Wang and Lingming Zhang. 2023. Is your code generated by chatgpt really correct? rigorous evaluation of large language models for code generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.01210 (2023)."},{"key":"e_1_3_3_2_18_2","unstructured":"Shuai Lu Daya Guo Shuo Ren Junjie Huang Alexey Svyatkovskiy Ambrosio Blanco Colin Clement Dawn Drain Daxin Jiang Duyu Tang et\u00a0al. 2021. Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2102.04664 (2021)."},{"key":"e_1_3_3_2_19_2","unstructured":"Wendk\u00fbuni\u00a0C Ou\u00e9draogo Yinghua Li Kader Kabor\u00e9 Xunzhu Tang Anil Koyuncu Jacques Klein David Lo and Tegawend\u00e9\u00a0F Bissyand\u00e9. 2024. Test smells in LLM-Generated Unit Tests. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.10628 (2024)."},{"key":"e_1_3_3_2_20_2","doi-asserted-by":"crossref","unstructured":"Annibale Panichella Sebastiano Panichella Gordon Fraser Anand\u00a0Ashok Sawant and Vincent\u00a0J Hellendoorn. 2022. Test smells 20 years later: detectability validity and reliability. Empirical Software Engineering 27 7 (2022) 170.","DOI":"10.1007\/s10664-022-10207-5"},{"key":"e_1_3_3_2_21_2","first-page":"311","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_3_2_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME58944.2024.00055"},{"key":"e_1_3_3_2_23_2","doi-asserted-by":"publisher","DOI":"10.1145\/3643795.3648395"},{"key":"e_1_3_3_2_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2014.31"},{"key":"e_1_3_3_2_25_2","doi-asserted-by":"crossref","unstructured":"Severin Primbs Benedikt Fein and Gordon Fraser. 2025. AsserT5: Test Assertion Generation Using a Fine-Tuned Code Language Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2502.02708 (2025).","DOI":"10.1109\/AST66626.2025.00008"},{"key":"e_1_3_3_2_26_2","unstructured":"Baptiste Roziere Jonas Gehring Fabian Gloeckle Sten Sootla Itai Gat Xiaoqing\u00a0Ellen Tan Yossi Adi Jingyu Liu Romain Sauvestre Tal Remez et\u00a0al. 2023. Code llama: Open foundation models for code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2308.12950 (2023)."},{"key":"e_1_3_3_2_27_2","doi-asserted-by":"publisher","DOI":"10.1145\/3661167.3661216"},{"key":"e_1_3_3_2_28_2","doi-asserted-by":"crossref","unstructured":"Chia-Yi Su and Collin McMillan. 2024. Distilled GPT for source code summarization. Automated Software Engineering 31 1 (2024) 22.","DOI":"10.1007\/s10515-024-00421-4"},{"key":"e_1_3_3_2_29_2","unstructured":"Weisong Sun Chunrong Fang Yudu You Yun Miao Yi Liu Yuekang Li Gelei Deng Shenghan Huang Yuchen Chen Quanjun Zhang et\u00a0al. 2023. Automatic code summarization via chatgpt: How far are we? arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2305.12865 (2023)."},{"key":"e_1_3_3_2_30_2","unstructured":"Weisong Sun Yun Miao Yuekang Li Hongyu Zhang Chunrong Fang Yi Liu Gelei Deng Yang Liu and Zhenyu Chen. 2024. Source code summarization in the era of large language models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.07959 (2024)."},{"key":"e_1_3_3_2_31_2","unstructured":"Mahan Tafreshipour Aaron Imani Eric Huang Eduardo Almeida Thomas Zimmermann and Iftekhar Ahmed. 2024. Prompting in the Wild: An Empirical Study of Prompt Evolution in Software Repositories. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.17298 (2024)."},{"key":"e_1_3_3_2_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/NLBSE59153.2023.00015"},{"key":"e_1_3_3_2_33_2","unstructured":"Michele Tufano Dawn Drain Alexey Svyatkovskiy Shao\u00a0Kun Deng and Neel Sundaresan. 2020. Unit test case generation with transformers and focal context. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2009.05617 (2020)."},{"key":"e_1_3_3_2_34_2","doi-asserted-by":"crossref","unstructured":"Junjie Wang Yuchao Huang Chunyang Chen Zhe Liu Song Wang and Qing Wang. 2024. Software testing with large language models: Survey landscape and vision. IEEE Transactions on Software Engineering (2024).","DOI":"10.1109\/TSE.2024.3368208"},{"key":"e_1_3_3_2_35_2","doi-asserted-by":"crossref","unstructured":"Yaqing Wang Quanming Yao James\u00a0T Kwok and Lionel\u00a0M Ni. 2020. Generalizing from a few examples: A survey on few-shot learning. ACM computing surveys (csur) 53 3 (2020) 1\u201334.","DOI":"10.1145\/3386252"},{"key":"e_1_3_3_2_36_2","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380429"},{"key":"e_1_3_3_2_37_2","unstructured":"Jason Wei Xuezhi Wang Dale Schuurmans Maarten Bosma Fei Xia Ed Chi Quoc\u00a0V Le Denny Zhou et\u00a0al. 2022. Chain-of-thought prompting elicits reasoning in large language models. Advances in neural information processing systems 35 (2022) 24824\u201324837."},{"key":"e_1_3_3_2_38_2","doi-asserted-by":"crossref","unstructured":"Lucas Zamprogno Braxton Hall Reid Holmes and Joanne\u00a0M Atlee. 2022. Dynamic human-in-the-loop assertion generation. IEEE Transactions on Software Engineering 49 4 (2022) 2337\u20132351.","DOI":"10.1109\/TSE.2022.3217544"},{"key":"e_1_3_3_2_39_2","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380383"},{"key":"e_1_3_3_2_40_2","doi-asserted-by":"crossref","unstructured":"Quanjun Zhang Weifeng Sun Chunrong Fang Bowen Yu Hongyan Li Meng Yan Jianyi Zhou and Zhenyu Chen. 2025. Exploring automated assertion generation via large language models. ACM Transactions on Software Engineering and Methodology 34 3 (2025) 1\u201325.","DOI":"10.1145\/3699598"},{"key":"e_1_3_3_2_41_2","unstructured":"Tianyi Zhang Varsha Kishore Felix Wu Kilian\u00a0Q Weinberger and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.09675 (2019)."}],"event":{"name":"EASE '25: Evaluation and Assessment in Software Engineering","location":"Istanbul Turkiye","acronym":"EASE '25"},"container-title":["Proceedings of the 29th International Conference on Evaluation and Assessment in Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3756681.3757000","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:43:44Z","timestamp":1766565824000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3756681.3757000"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,17]]},"references-count":40,"alternative-id":["10.1145\/3756681.3757000","10.1145\/3756681"],"URL":"https:\/\/doi.org\/10.1145\/3756681.3757000","relation":{},"subject":[],"published":{"date-parts":[[2025,6,17]]},"assertion":[{"value":"2025-12-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}