{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:24:56Z","timestamp":1780356296320,"version":"3.54.1"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,6,17]]},"DOI":"10.1145\/3756681.3756993","type":"proceedings-article","created":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:30:04Z","timestamp":1766565004000},"page":"969-975","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Benchmarking LLM for Code Smells Detection: OpenAI GPT-4.0 vs DeepSeek-V3"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8291-2211","authenticated-orcid":false,"given":"Ahmed R.","family":"Sadik","sequence":"first","affiliation":[{"name":"Honda Research Institute Europe, Offenbach am Main, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-6173-0667","authenticated-orcid":false,"given":"Siddhata","family":"Govind","sequence":"additional","affiliation":[{"name":"Honda Research Institute Europe, Offenbach am Main, Germany"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2025,12,24]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Josh Achiam Steven Adler Sandhini Agarwal Lama Ahmad Ilge Akkaya Florencia\u00a0Leoni Aleman Diogo Almeida Janko Altenschmidt Sam Altman Shyamal Anadkat et\u00a0al. 2023. Gpt-4 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.08774 (2023)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.5753\/sbes.2024.3561"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1145\/3661167.3661225"},{"key":"e_1_3_3_1_5_2","unstructured":"Evgenii Evstafev. 2025. Token-Hungry Yet Precise: DeepSeek R1 Highlights the Need for Multi-Step Reasoning Over Speed in MATH. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.18576 (2025)."},{"key":"e_1_3_3_1_6_2","unstructured":"Jiri Gesi Siqi Liu Jiawei Li Iftekhar Ahmed Nachiappan Nagappan David Lo Eduardo\u00a0Santana de Almeida Pavneet\u00a0Singh Kochhar and Lingfeng Bao. 2022. Code smells in machine learning systems. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2203.00803 (2022)."},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1145\/3190645.3190697"},{"key":"e_1_3_3_1_8_2","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et\u00a0al. 2024. Deepseek-v3 technical report. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2412.19437 (2024)."},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"crossref","unstructured":"Keila Lucas Rohit Gheyi Elvys Soares M\u00e1rcio Ribeiro and Ivan Machado. 2024. Evaluating large language models in detecting test smells. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2407.19261 (2024).","DOI":"10.5753\/sbes.2024.3642"},{"key":"e_1_3_3_1_10_2","first-page":"6","volume-title":"Proceedings of 7th International Workshop on Object-Oriented Reengineering (WOOR)","author":"Marticorena Ra\u00fal","year":"2006","unstructured":"Ra\u00fal Marticorena, Carlos L\u00f3pez, and Yania Crespo. 2006. Extending a taxonomy of bad code smells with metrics. In Proceedings of 7th International Workshop on Object-Oriented Reengineering (WOOR). Citeseer, 6."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"crossref","unstructured":"Fnu Neha and Deepshikha Bhati. 2025. A Survey of DeepSeek Models. Authorea Preprints (2025).","DOI":"10.36227\/techrxiv.173896582.25938392\/v1"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Thanis Paiva Amanda Damasceno Eduardo Figueiredo and Cl\u00e1udio Sant\u2019Anna. 2017. On the evaluation of code smells and detection tools. Journal of Software Engineering Research and Development 5 (2017) 1\u201328.","DOI":"10.1186\/s40411-017-0041-1"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICIT52682.2021.9491688"},{"key":"e_1_3_3_1_14_2","unstructured":"Refactoring.Guru. [n. d.]. Code Smells. https:\/\/refactoring.guru\/refactoring\/smells. Accessed: 2025-02-07."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"publisher","unstructured":"Ahmed\u00a0R. Sadik. 2025. Smelly Code Dataset - Python\/Java\/JavaScript\/C++. 10.5281\/zenodo.14989674","DOI":"10.5281\/zenodo.14989674"},{"key":"e_1_3_3_1_16_2","unstructured":"Ahmed\u00a0R. Sadik. 2025. Smelly Code Dataset - Python\/Java\/JavaScript\/C++. https:\/\/github.com\/HRI-EU\/SmellyCodeDataset Accessed: 2025-02-07."},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"crossref","unstructured":"Ahmed\u00a0R Sadik Sebastian Brulin and Markus Olhofer. 2023. Coding by design: Gpt-4 empowers agile model driven development. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.04304 (2023).","DOI":"10.5220\/0012356100003645"},{"key":"e_1_3_3_1_18_2","unstructured":"Ahmed\u00a0R Sadik Antonello Ceravola Frank Joublin and Jibesh Patra. 2023. Analysis of chatgpt on source code. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2306.00597 (2023)."},{"key":"e_1_3_3_1_19_2","unstructured":"Ahmed\u00a0R Sadik and Siddhata Govind. 2025. Benchmarking LLM for Code Smells Detection: OpenAI GPT-4.0 vs DeepSeek-V3. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.16027 (2025)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1145\/3319008.3319033"},{"key":"e_1_3_3_1_21_2","doi-asserted-by":"crossref","unstructured":"Stuti Tandon Vijay Kumar and VB Singh. 2024. Study of Code Smells: A Review and Research Agenda. International Journal of Mathematical Engineering & Management Sciences 9 3 (2024).","DOI":"10.33889\/IJMEMS.2024.9.3.025"},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Yiyi Tang Ziyan Xiao Xue Li Qiwen Fang Qingpeng Zhang Daniel Yee\u00a0Tak Fong Francisco Tsz\u00a0Tsun Lai Celine Sze\u00a0Ling Chui Esther Wai\u00a0Yin Chan Ian Chi\u00a0Kei Wong et\u00a0al. 2024. Large language model in medical information extraction from titles and abstracts with prompt engineering strategies: A comparative study of gpt-3.5 and gpt-4. medRxiv (2024) 2024\u201303.","DOI":"10.1101\/2024.03.20.24304572"},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Bartosz Walter and Tarek Alkhaeir. 2016. The relationship between design patterns and code smells: An exploratory study. Information and Software Technology 74 (2016) 127\u2013142.","DOI":"10.1016\/j.infsof.2016.02.003"},{"key":"e_1_3_3_1_24_2","unstructured":"Muhammad Waseem Teerath Das Aakash Ahmad Peng Liang Mahdi Fehmideh and Tommi Mikkonen. 2023. ChatGPT as a software development bot: a project-based study. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2310.13648 (2023)."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"publisher","DOI":"10.1145\/3691620.3695508"}],"event":{"name":"EASE '25: Evaluation and Assessment in Software Engineering","location":"Istanbul Turkiye","acronym":"EASE '25"},"container-title":["Proceedings of the 29th International Conference on Evaluation and Assessment in Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3756681.3756993","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T08:38:56Z","timestamp":1766565536000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3756681.3756993"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6,17]]},"references-count":24,"alternative-id":["10.1145\/3756681.3756993","10.1145\/3756681"],"URL":"https:\/\/doi.org\/10.1145\/3756681.3756993","relation":{},"subject":[],"published":{"date-parts":[[2025,6,17]]},"assertion":[{"value":"2025-12-24","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}