{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,25]],"date-time":"2026-02-25T04:35:46Z","timestamp":1771994146570,"version":"3.50.1"},"reference-count":50,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"8","license":[{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,8,1]],"date-time":"2025-08-01T00:00:00Z","timestamp":1754006400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IIEEE Trans. Software Eng."],"published-print":{"date-parts":[[2025,8]]},"DOI":"10.1109\/tse.2025.3586082","type":"journal-article","created":{"date-parts":[[2025,7,4]],"date-time":"2025-07-04T13:50:48Z","timestamp":1751637048000},"page":"2329-2345","source":"Crossref","is-referenced-by-count":5,"title":["On the Effectiveness of LLM-as-a-Judge for Code Generation and Summarization"],"prefix":"10.1109","volume":"51","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-0951-4769","authenticated-orcid":false,"given":"Giuseppe","family":"Crupi","sequence":"first","affiliation":[{"name":"SEART @ Software Institute, Universit&#x00E0; della Svizzera italiana, Lugano, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7017-3066","authenticated-orcid":false,"given":"Rosalia","family":"Tufano","sequence":"additional","affiliation":[{"name":"SEART @ Software Institute, Universit&#x00E0; della Svizzera italiana, Lugano, Switzerland"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4829-1017","authenticated-orcid":false,"given":"Alejandro","family":"Velasco","sequence":"additional","affiliation":[{"name":"W&#x0026;M, Williamsburg, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7965-7712","authenticated-orcid":false,"given":"Antonio","family":"Mastropaolo","sequence":"additional","affiliation":[{"name":"W&#x0026;M, Williamsburg, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5626-7586","authenticated-orcid":false,"given":"Denys","family":"Poshyvanyk","sequence":"additional","affiliation":[{"name":"W&#x0026;M, Williamsburg, VA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2216-3148","authenticated-orcid":false,"given":"Gabriele","family":"Bavota","sequence":"additional","affiliation":[{"name":"SEART @ Software Institute, Universit&#x00E0; della Svizzera italiana, Lugano, Switzerland"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Replication package","year":"2025"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3551349.3559555"},{"key":"ref3","article-title":"Open AI ChatGPT","author":"AI","year":"2025"},{"key":"ref4","first-page":"65","article-title":"METEOR: An automatic metric for MT evaluation with improved correlation with human judgments","volume-title":"Proc. ACL Workshop Intrinsic Extrinsic Eval. Meas. Mach. Transl. Summarization","author":"Banerjee","year":"2005"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2021.3128234"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1177\/001316446002000104"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639219"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3551349.3556903"},{"key":"ref9","article-title":"Evaluating large language models trained on code","author":"Chen","year":"2021"},{"key":"ref10","article-title":"Code Llama: Open foundation models for code","author":"Rozi\u00e8re","year":"2024"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.4324\/9780203803233"},{"key":"ref12","first-page":"25","article-title":"An extensible, regular-expression-based tool for multi-language mutant generation","volume-title":"Proc. 40th Int. Conf. Softw. Eng.: Companion, (ICSE)","author":"Groce","year":"2018"},{"key":"ref13","article-title":"DeepSeek-Coder: When the large language model meets programming \u2013 the rise of code intelligence","author":"Guo","year":"2024"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3623306"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/nnnnnnn.nnnnnnn"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387449"},{"key":"ref17","first-page":"1","article-title":"Measuring coding challenge competence with apps","volume-title":"Proc. NeurIPS","author":"Hendrycks","year":"2021"},{"key":"ref18","article-title":"On the limitations of fine-tuned judge models for LLM evaluation","author":"Huang","year":"2024"},{"key":"ref19","article-title":"Inference endpoints","year":"2025"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/3611643.3613892"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1145\/3660769"},{"key":"ref22","volume-title":"Thinking, Fast and Slow","author":"Kahneman","year":"2011"},{"key":"ref23","first-page":"22199","article-title":"Large language models are zero-shot reasoners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"35","author":"Kojima","year":"2022"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1145\/3641554.3701791"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1111\/j.1468-2958.2004.tb00738.x"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3661167.3661281"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1145\/3387904.3389268"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549081"},{"key":"ref29","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004","journal-title":"Text Summarization Branches Out"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3639174"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1016\/j.simpa.2022.100429"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3468264.3468588"},{"key":"ref34","article-title":"Is ChatGPT the ultimate programming assistant\u2013How far is it?","author":"Tian","year":"2023"},{"key":"ref35","first-page":"20032","article-title":"CodeJudge: Evaluating code generation with large language models","volume-title":"Proc. Conf. Empirical Methods Natural Lang. Process.","author":"Tong","year":"2024"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510621"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1145\/3485275"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ISSREW60843.2023.00040"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/3736407"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/SANER.2019.8668043"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1145\/2970276.2970326"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.2307\/3001968"},{"key":"ref43","article-title":"Lizard","author":"Yin","year":"2025"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1111\/j.2517-6161.1995.tb02031.x"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3623316"},{"key":"ref46","first-page":"46\u2009595","article-title":"Judging LLM-as-a-judge with MT-bench and chatbot arena","volume-title":"Adv. Neural Informat. Process. Syst.","volume":"36","author":"Zheng","year":"2023"},{"key":"ref47","article-title":"Judging LLM-as-a-judge with MT-bench and chatbot arena","author":"Zheng","year":"2023"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599790"},{"key":"ref49","first-page":"2232","article-title":"ICE-score: Instructing large language models to evaluate code","volume-title":"Proc. Findings Assoc. Comput. Linguistics (EACL)","author":"Zhuo","year":"2024"},{"key":"ref50","article-title":"BigcodeBENCH: Benchmarking code generation with diverse function calls and complex instructions","author":"Zhuo","year":"2024"}],"container-title":["IEEE Transactions on Software Engineering"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/32\/11126986\/11071936.pdf?arnumber=11071936","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,18]],"date-time":"2025-08-18T19:43:30Z","timestamp":1755546210000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11071936\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8]]},"references-count":50,"journal-issue":{"issue":"8"},"URL":"https:\/\/doi.org\/10.1109\/tse.2025.3586082","relation":{},"ISSN":["0098-5589","1939-3520","2326-3881"],"issn-type":[{"value":"0098-5589","type":"print"},{"value":"1939-3520","type":"electronic"},{"value":"2326-3881","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8]]}}}