{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T03:48:33Z","timestamp":1781927313831,"version":"3.54.5"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2021,8,18]],"date-time":"2021-08-18T00:00:00Z","timestamp":1629244800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2021,8,20]]},"DOI":"10.1145\/3468264.3468588","type":"proceedings-article","created":{"date-parts":[[2021,8,19]],"date-time":"2021-08-19T01:40:37Z","timestamp":1629337237000},"page":"1105-1116","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":87,"title":["Reassessing automatic evaluation metrics for code summarization tasks"],"prefix":"10.1145","author":[{"given":"Devjeet","family":"Roy","sequence":"first","affiliation":[{"name":"Washington State University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sarah","family":"Fakhoury","sequence":"additional","affiliation":[{"name":"Washington State University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Venera","family":"Arnaoudova","sequence":"additional","affiliation":[{"name":"Washington State University, USA"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2021,8,18]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2020.110800"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"crossref","unstructured":"Wasi Ahmad Saikat Chakraborty Baishakhi Ray and Kai-Wei Chang. 2020. A Transformer-based Approach for Source Code Summarization. In ACL (short).  Wasi Ahmad Saikat Chakraborty Baishakhi Ray and Kai-Wei Chang. 2020. A Transformer-based Approach for Source Code Summarization. In ACL (short).","DOI":"10.18653\/v1\/2020.acl-main.449"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASWEC.2018.00011"},{"key":"e_1_3_2_1_4_1","volume-title":"International Conference on Machine Learning. 2091\u20132100","author":"Allamanis Miltiadis","year":"2016","unstructured":"Miltiadis Allamanis , Hao Peng , and Charles Sutton . 2016 . A convolutional attention network for extreme summarization of source code . In International Conference on Machine Learning. 2091\u20132100 . Miltiadis Allamanis, Hao Peng, and Charles Sutton. 2016. A convolutional attention network for extreme summarization of source code. In International Conference on Machine Learning. 2091\u20132100."},{"key":"e_1_3_2_1_5_1","unstructured":"Uri Alon Shaked Brody Omer Levy and Eran Yahav. 2018. code2seq: Generating sequences from structured representations of code. arXiv preprint arXiv:1808.01400.  Uri Alon Shaked Brody Omer Levy and Eran Yahav. 2018. code2seq: Generating sequences from structured representations of code. arXiv preprint arXiv:1808.01400."},{"key":"e_1_3_2_1_6_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65\u201372","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie . 2005 . METEOR: An automatic metric for MT evaluation with improved correlation with human judgments . In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65\u201372 . Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65\u201372."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3346"},{"key":"e_1_3_2_1_8_1","volume-title":"Neural Comment Generation for Source Code with Auxiliary Code Classification Task. In 2019 26th Asia-Pacific Software Engineering Conference (APSEC). 522\u2013529","author":"Chen Minghao","year":"2019","unstructured":"Minghao Chen and Xiaojun Wan . 2019 . Neural Comment Generation for Source Code with Auxiliary Code Classification Task. In 2019 26th Asia-Pacific Software Engineering Conference (APSEC). 522\u2013529 . Minghao Chen and Xiaojun Wan. 2019. Neural Comment Generation for Source Code with Auxiliary Code Classification Task. In 2019 26th Asia-Pacific Software Engineering Conference (APSEC). 522\u2013529."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3238147.3240471"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigComp48618.2020.00011"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P18-1128"},{"key":"e_1_3_2_1_12_1","volume-title":"Structured Neural Summarization. In International Conference on Learning Representations.","author":"Fernandes Patrick","year":"2018","unstructured":"Patrick Fernandes , Miltiadis Allamanis , and Marc Brockschmidt . 2018 . Structured Neural Summarization. In International Conference on Learning Representations. Patrick Fernandes, Miltiadis Allamanis, and Marc Brockschmidt. 2018. Structured Neural Summarization. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/N15-1124"},{"key":"e_1_3_2_1_14_1","volume-title":"Proceedings of the 7th Linguistic Annotation Workshop and Interoperability with Discourse. 33\u201341","author":"Graham Yvette","year":"2013","unstructured":"Yvette Graham , Timothy Baldwin , Alistair Moffat , and Justin Zobel . 2013 . Continuous measurement scales in human evaluation of machine translation . In Proceedings of the 7th Linguistic Annotation Workshop and Interoperability with Discourse. 33\u201341 . Yvette Graham, Timothy Baldwin, Alistair Moffat, and Justin Zobel. 2013. Continuous measurement scales in human evaluation of machine translation. In Proceedings of the 7th Linguistic Annotation Workshop and Interoperability with Discourse. 33\u201341."},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/W14-3333"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3324884.3416546"},{"key":"e_1_3_2_1_17_1","first-page":"258","article-title":"Automatic comment generation using a neural translation model","volume":"55","author":"Haije Tjalling","year":"2016","unstructured":"Tjalling Haije , Bachelor Opleiding Kunstmatige Intelligentie , E Gavves , and H Heuer . 2016 . Automatic comment generation using a neural translation model . Inf. Softw. Technol. , 55 , 3 (2016), 258 \u2013 268 . Tjalling Haije, Bachelor Opleiding Kunstmatige Intelligentie, E Gavves, and H Heuer. 2016. Automatic comment generation using a neural translation model. Inf. Softw. Technol., 55, 3 (2016), 258\u2013268.","journal-title":"Inf. Softw. Technol."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3379597.3387449"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3196321.3196334"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/s10664-019-09730-9"},{"key":"e_1_3_2_1_21_1","volume-title":"Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence (IJCAI). 19","author":"Hu Xing","year":"2018","unstructured":"Xing Hu , Ge Li , Xin Xia , David Lo , Shuai Lu , and Zhi Jin . 2018 . Summarizing source code with transferred api knowledge.(2018) . In Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence (IJCAI). 19 , 2269\u20132275. Xing Hu, Ge Li, Xin Xia, David Lo, Shuai Lu, and Zhi Jin. 2018. Summarizing source code with transferred api knowledge.(2018). In Proceedings of the Twenty-Seventh International Joint Conference on Artificial Intelligence (IJCAI). 19, 2269\u20132275."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.infsof.2020.106373"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P16-1195"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3387904.3389268"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00087"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"crossref","unstructured":"Alexander LeClair and Collin McMillan. 2019. Recommendations for datasets for source code summarization. arXiv preprint arXiv:1904.02660.  Alexander LeClair and Collin McMillan. 2019. Recommendations for datasets for source code summarization. arXiv preprint arXiv:1904.02660.","DOI":"10.18653\/v1\/N19-1394"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417926"},{"key":"e_1_3_2_1_28_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence. 32","author":"Liang Yuding","year":"2018","unstructured":"Yuding Liang and Kenny Zhu . 2018 . Automatic generation of text descriptive comments for code blocks . In Proceedings of the AAAI Conference on Artificial Intelligence. 32 . Yuding Liang and Kenny Zhu. 2018. Automatic generation of text descriptive comments for code blocks. In Proceedings of the AAAI Conference on Artificial Intelligence. 32."},{"key":"e_1_3_2_1_29_1","first-page":"44","article-title":"A Technique for the Measurement of Attitudes","volume":"140","author":"Likert R.","year":"1932","unstructured":"R. Likert . 1932 . A Technique for the Measurement of Attitudes . Archives of Psychology , 140 (1932), 44 \u2013 53 . R. Likert. 1932. A Technique for the Measurement of Attitudes. Archives of Psychology, 140 (1932), 44\u201353.","journal-title":"Archives of Psychology"},{"key":"e_1_3_2_1_30_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin . 2004 . Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381. Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74\u201381."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3361242.3362774"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME46990.2020.00114"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W19-5302"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"crossref","unstructured":"Nitika Mathur Tim Baldwin and Trevor Cohn. 2020. Tangled up in BLEU: Reevaluating the Evaluation of Automatic Machine Translation Evaluation Metrics. arXiv preprint arXiv:2006.06264.  Nitika Mathur Tim Baldwin and Trevor Cohn. 2020. Tangled up in BLEU: Reevaluating the Evaluation of Automatic Machine Translation Evaluation Metrics. arXiv preprint arXiv:2006.06264.","DOI":"10.18653\/v1\/2020.acl-main.448"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3283812.3283822"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/2597008.2597149"},{"key":"e_1_3_2_1_37_1","volume-title":"International Conference on Program Comprehension (ICPC). 230\u2013232","author":"Moreno L.","unstructured":"L. Moreno , A. Marcus , L. Pollock , and K. Vijay-Shanker . 2013. JSummarizer: An automatic generator of natural language summaries for Java classes . In International Conference on Program Comprehension (ICPC). 230\u2013232 . L. Moreno, A. Marcus, L. Pollock, and K. Vijay-Shanker. 2013. JSummarizer: An automatic generator of natural language summaries for Java classes. In International Conference on Program Comprehension (ICPC). 230\u2013232."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11390-016-1671-1"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.36"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni , Salim Roukos , Todd Ward , and Wei-Jing Zhu . 2002 . BLEU: a method for automatic evaluation of machine translation . In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318 . Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. BLEU: a method for automatic evaluation of machine translation. In Proceedings of the 40th annual meeting of the Association for Computational Linguistics. 311\u2013318."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W15-3049"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"crossref","unstructured":"Matt Post. 2018. A call for clarity in reporting BLEU scores. arXiv preprint arXiv:1804.08771.  Matt Post. 2018. A call for clarity in reporting BLEU scores. arXiv preprint arXiv:1804.08771.","DOI":"10.18653\/v1\/W18-6319"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1162\/coli_a_00322"},{"key":"e_1_3_2_1_44_1","volume-title":"Proceedings of the ACL workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 57\u201364","author":"Riezler Stefan","year":"2005","unstructured":"Stefan Riezler and John T Maxwell III. 2005 . On some pitfalls in automatic evaluation and significance testing for MT . In Proceedings of the ACL workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 57\u201364 . Stefan Riezler and John T Maxwell III. 2005. On some pitfalls in automatic evaluation and significance testing for MT. In Proceedings of the ACL workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 57\u201364."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"crossref","unstructured":"Peter C. Rigby Daniel M German Laura Cowen and Margaret-Anne Storey. 2014. Peer Review on Open Source Software Projects: Parameters Statistical Models and Theory. ACM Transactions on Software Engineering and Methodology (TOSEM) To appear.  Peter C. Rigby Daniel M German Laura Cowen and Margaret-Anne Storey. 2014. Peer Review on Open Source Software Projects: Parameters Statistical Models and Theory. ACM Transactions on Software Engineering and Methodology (TOSEM) To appear.","DOI":"10.1145\/2594458"},{"key":"e_1_3_2_1_46_1","unstructured":"Devjeet Roy Sarah Fakhoury and Venera Arnaoudova. 2021. Online Replication Package. https:\/\/github.com\/devjeetr\/Re-assessing-automatic-evaluation-metrics-for-source-code-summarization-tasks  Devjeet Roy Sarah Fakhoury and Venera Arnaoudova. 2021. Online Replication Package. https:\/\/github.com\/devjeetr\/Re-assessing-automatic-evaluation-metrics-for-source-code-summarization-tasks"},{"key":"e_1_3_2_1_47_1","unstructured":"Devjeet Roy Ziyi Zhang Venera Arnaoudova A Panichella Sebastiano Panichella Danielle Gonzalez and Mehdi Mirakhorli. 2020. DeepTC-Enhancer: Improving the Readability of Automatically Generated Tests.  Devjeet Roy Ziyi Zhang Venera Arnaoudova A Panichella Sebastiano Panichella Danielle Gonzalez and Mehdi Mirakhorli. 2020. DeepTC-Enhancer: Improving the Readability of Automatically Generated Tests."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8851751"},{"key":"e_1_3_2_1_49_1","first-page":"29","article-title":"BLEU deconstructed: Designing a better MT evaluation metric","volume":"4","author":"Song Xingyi","year":"2013","unstructured":"Xingyi Song , Trevor Cohn , and Lucia Specia . 2013 . BLEU deconstructed: Designing a better MT evaluation metric . International Journal of Computational Linguistics and Applications , 4 , 2 (2013), 29 \u2013 44 . Xingyi Song, Trevor Cohn, and Lucia Specia. 2013. BLEU deconstructed: Designing a better MT evaluation metric. International Journal of Computational Linguistics and Applications, 4, 2 (2013), 29\u201344.","journal-title":"International Journal of Computational Linguistics and Applications"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2931579"},{"key":"e_1_3_2_1_51_1","volume-title":"Proceedings of the Fifth Conference on Machine Translation. Association for Computational Linguistics, Online. 928\u2013933","author":"Stanchev Peter","year":"2020","unstructured":"Peter Stanchev , Weiyue Wang , and Hermann Ney . 2020 . Towards a Better Evaluation of Metrics for Machine Translation . In Proceedings of the Fifth Conference on Machine Translation. Association for Computational Linguistics, Online. 928\u2013933 . Peter Stanchev, Weiyue Wang, and Hermann Ney. 2020. Towards a Better Evaluation of Metrics for Machine Translation. In Proceedings of the Fifth Conference on Machine Translation. Association for Computational Linguistics, Online. 928\u2013933."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1145\/3387904.3389258"},{"key":"e_1_3_2_1_53_1","volume-title":"Automatic Generation of Program Comments Based on Problem Statements for Computational Thinking. In 2019 8th International Congress on Advanced Applied Informatics (IIAI-AAI). 629\u2013634","author":"Takahashi Akiyoshi","year":"2019","unstructured":"Akiyoshi Takahashi , Hiromitsu Shiina , and Nobuyuki Kobayashi . 2019 . Automatic Generation of Program Comments Based on Problem Statements for Computational Thinking. In 2019 8th International Congress on Advanced Applied Informatics (IIAI-AAI). 629\u2013634 . Akiyoshi Takahashi, Hiromitsu Shiina, and Nobuyuki Kobayashi. 2019. Automatic Generation of Program Comments Based on Problem Statements for Computational Thinking. In 2019 8th International Congress on Advanced Applied Informatics (IIAI-AAI). 629\u2013634."},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3238147.3238206"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.3011744"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2020.2979701"},{"key":"e_1_3_2_1_57_1","unstructured":"Bolin Wei Ge Li Xin Xia Zhiyi Fu and Zhi Jin. 2019. Code generation as a dual task of code summarization. In Advances in Neural Information Processing Systems. 6563\u20136573.  Bolin Wei Ge Li Xin Xia Zhiyi Fu and Zhi Jin. 2019. Code generation as a dual task of code summarization. In Advances in Neural Information Processing Systems. 6563\u20136573."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.5555\/271581.271584"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.1109\/APSEC.2018.00101"},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3366423.3380295"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.350"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.18293\/SEKE2018-191"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3377811.3380383"},{"key":"e_1_3_2_1_64_1","volume-title":"Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675.","author":"Zhang Tianyi","year":"2019","unstructured":"Tianyi Zhang , Varsha Kishore , Felix Wu , Kilian Q Weinberger , and Yoav Artzi . 2019 . Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675. Tianyi Zhang, Varsha Kishore, Felix Wu, Kilian Q Weinberger, and Yoav Artzi. 2019. Bertscore: Evaluating text generation with bert. arXiv preprint arXiv:1904.09675."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jss.2019.07.087"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1002\/spe.2893"}],"event":{"name":"ESEC\/FSE '21: 29th ACM Joint European Software Engineering Conference and Symposium on the Foundations of Software Engineering","location":"Athens Greece","acronym":"ESEC\/FSE '21","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering"]},"container-title":["Proceedings of the 29th ACM Joint Meeting on European Software Engineering Conference and Symposium on the Foundations of Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3468264.3468588","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3468264.3468588","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T21:24:51Z","timestamp":1750195491000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3468264.3468588"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,8,18]]},"references-count":66,"alternative-id":["10.1145\/3468264.3468588","10.1145\/3468264"],"URL":"https:\/\/doi.org\/10.1145\/3468264.3468588","relation":{},"subject":[],"published":{"date-parts":[[2021,8,18]]},"assertion":[{"value":"2021-08-18","order":2,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}