{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T08:42:21Z","timestamp":1768380141931,"version":"3.49.0"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100000038","name":"Natural Sciences and Engineering Research Council of Canada","doi-asserted-by":"crossref","award":["RGPIN- 2019-05071"],"award-info":[{"award-number":["RGPIN- 2019-05071"]}],"id":[{"id":"10.13039\/501100000038","id-type":"DOI","asserted-by":"crossref"}]},{"DOI":"10.13039\/501100019117","name":"Vector Institute","doi-asserted-by":"publisher","award":["Vector Scholarship in Artificial Intelligence"],"award-info":[{"award-number":["Vector Scholarship in Artificial Intelligence"]}],"id":[{"id":"10.13039\/501100019117","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Empir Software Eng"],"published-print":{"date-parts":[[2026,3]]},"DOI":"10.1007\/s10664-025-10768-1","type":"journal-article","created":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T05:31:28Z","timestamp":1764999088000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Output format biases in the evaluation of large language models for code translation"],"prefix":"10.1007","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-1008-1073","authenticated-orcid":false,"given":"Marcos","family":"Macedo","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yuan","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Filipe R.","family":"Cogo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bram","family":"Adams","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,12,6]]},"reference":[{"key":"10768_CR1","unstructured":"(2024) Airoboros 13B HF fp16. https:\/\/huggingface.co\/TheBloke\/airoboros-13B-HF. Accessed: date-of-access"},{"key":"10768_CR2","unstructured":"(2024) LLama 2. https:\/\/ai.meta.com\/research\/publications\/llama-2-open-foundation-and-fine-tuned-chat-models\/. Accessed: date-of-access"},{"key":"10768_CR3","unstructured":"(2024) Wizard-Vicuna-13B-Uncensored float16 HF. https:\/\/huggingface.co\/TheBloke\/Wizard-Vicuna-13B-Uncensored-HF. Accessed: date-of-access"},{"key":"10768_CR4","unstructured":"Achiam J, Adler S, Agarwal S, Ahmad L, Akkaya I, Aleman FL, Almeida D, Altenschmidt J, Altman S, Anadkat S et\u00a0al (2023) GPT-4 Technical report. arXiv:2303.08774"},{"key":"10768_CR5","unstructured":"Aggarwal K, Salameh M, Hindle A (2015) Using machine translation for converting python 2 to python 3 code. Technical Report, PeerJ PrePrints"},{"key":"10768_CR6","unstructured":"Chen X, Liu C, Song D (2018) Tree-to-tree neural networks for program translation. Adv Neural Inf Process Syst 31"},{"key":"10768_CR7","doi-asserted-by":"crossref","unstructured":"Eghbali A, Pradel M (2022) CrystalBLEU: precisely and efficiently measuring the similarity of code. In: Proceedings of the 37th IEEE\/ACM International conference on automated software engineering, pp 1\u201312","DOI":"10.1145\/3551349.3556903"},{"key":"10768_CR8","doi-asserted-by":"crossref","unstructured":"Fan A, Gokkaya B, Harman M, Lyubarskiy M, Sengupta S, Yoo S, Zhang JM (2023) Large language models for software engineering: survey and open problems. arXiv:2310.03533","DOI":"10.1109\/ICSE-FoSE59343.2023.00008"},{"key":"10768_CR9","doi-asserted-by":"crossref","unstructured":"Feng S, Chen C (2023) Prompting is all your need: automated android bug replay with large language models. arXiv:2306.01987","DOI":"10.1145\/3597503.3608137"},{"key":"10768_CR10","doi-asserted-by":"crossref","unstructured":"Feng Z, Guo D, Tang D, Duan N, Feng X, Gong M, Shou L, Qin B, Liu T, Jiang D et\u00a0al. (2020) Codebert: a pre-trained model for programming and natural languages. arXiv:2002.08155","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"10768_CR11","doi-asserted-by":"crossref","unstructured":"Gao S, Wen X-C, Gao C, Wang W, Lyu MR (2023) Constructing effective in-context demonstration for code intelligence tasks: an empirical study. arXiv:2304.07575","DOI":"10.1109\/ASE56229.2023.00109"},{"key":"10768_CR12","doi-asserted-by":"crossref","unstructured":"Geng M, Wang S, Dong D, Wang H, Li G, Jin Z, Mao X, Liao X (2024) Large language models are few-shot summarizers: multi-intent comment generation via in-context learning","DOI":"10.1145\/3597503.3608134"},{"key":"10768_CR13","doi-asserted-by":"publisher","unstructured":"Ibrahimzada AR, Ke K, Pawagi M, Abid MS, Pan R, Sinha S, Jabbarvand R (2025) AlphaTrans: a neuro-symbolic compositional approach for repository-level code translation and validation. arXiv:2410.24117 [cs]. https:\/\/doi.org\/10.1145\/3729379","DOI":"10.1145\/3729379"},{"key":"10768_CR14","doi-asserted-by":"crossref","unstructured":"Jiao M, Yu T, Li X, Qiu G, Gu X, Shen B ( 2023) On the evaluation of neural code translation: taxonomy and benchmark. In: 2023 38th IEEE\/ACM International conference on automated software engineering (ASE). IEEE, pp 1529\u20131541","DOI":"10.1109\/ASE56229.2023.00114"},{"key":"10768_CR15","doi-asserted-by":"crossref","unstructured":"Karaivanov S, Raychev V, Vechev M (2014) Phrase-based statistical translation of programming languages. In: Proceedings of the 2014 ACM International symposium on new ideas, new paradigms, and reflections on programming & software, pp 173\u2013184","DOI":"10.1145\/2661136.2661148"},{"key":"10768_CR16","doi-asserted-by":"publisher","unstructured":"Kwon W, Li Z, Zhuang S, Sheng Y, Zheng L, Yu CH, Gonzalez JE, Zhang H, Stoica I (2023) Efficient memory management for large language model serving with pagedattention. arXiv:2309.06180 [cs].https:\/\/doi.org\/10.48550\/arXiv.2309.06180","DOI":"10.48550\/arXiv.2309.06180"},{"key":"10768_CR17","first-page":"14967","volume":"34","author":"M-A Lachaux","year":"2021","unstructured":"Lachaux M-A, Roziere B, Szafraniec M, Lample G (2021) DOBF: A deobfuscation pre-training objective for programming languages. Adv Neural Inf Process Syst 34:14967\u201314979","journal-title":"Adv Neural Inf Process Syst"},{"key":"10768_CR18","doi-asserted-by":"crossref","unstructured":"Li T-O, Zong W, Wang Y, Tian H, Wang Y, Cheung S-C, Kramer J (2023a) Nuances are the key: unlocking ChatGPT to find failure-inducing tests with differential prompting. In: 2023 38th IEEE\/ACM International conference on automated software engineering (ASE). IEEE, pp 14\u201326","DOI":"10.1109\/ASE56229.2023.00089"},{"key":"10768_CR19","doi-asserted-by":"publisher","unstructured":"Li R, Allal LB, Zi Y, Muennighoff N, Kocetkov D, Mou C, Marone M, Akiki C, Li J, Chim J, Liu Q, Zheltonozhskii E, Zhuo TY, Wang T, Dehaene O, Davaadorj M, Lamy-Poirier J, Monteiro J, Shliazhko O, Gontier N, Meade N, Zebaze A, Yee M-H, Umapathi LK, Zhu J, Lipkin B, Oblokulov M, Wang Z, Murthy R, Stillerman J, Patel SS, Abulkhanov D, Zocca M, Dey M, Zhang Z, Fahmy N, Bhattacharyya U, Yu W, Singh S, Luccioni S, Villegas P, Kunakov M, Zhdanov F, Romero M, Lee T, Timor N, Ding J, Schlesinger C, Schoelkopf H, Ebert J, Dao T, Mishra M, Gu A, Robinson J, Anderson CJ, Dolan-Gavitt B, Contractor D, Reddy S, Fried D, Bahdanau D, Jernite Y, Ferrandis CM, Hughes S, Wolf T, Guha A, von Werra L, de Vries H (2023b) StarCoder: may the source be with you! arXiv:2305.06161 [cs]. https:\/\/doi.org\/10.48550\/arXiv.2305.06161","DOI":"10.48550\/arXiv.2305.06161"},{"key":"10768_CR20","unstructured":"Lu S, Guo D, Ren S, Huang J, Svyatkovskiy A, Blanco A, Clement C, Drain D, Jiang D, Tang D et\u00a0al (2021) Codexglue: A machine learning benchmark dataset for code understanding and generation. arXiv:2102.04664"},{"key":"10768_CR21","doi-asserted-by":"publisher","unstructured":"Luo Z, Xu C, Zhao P, Sun Q, Geng X, Hu W, Tao C, Ma J, Lin Q, Jiang D (2023) WizardCoder: empowering code large language models with evol-instruct. arXiv:2306.08568 [cs]. https:\/\/doi.org\/10.48550\/arXiv.2306.08568","DOI":"10.48550\/arXiv.2306.08568"},{"key":"10768_CR22","doi-asserted-by":"crossref","unstructured":"Macedo M, Tian Y, Cogo F, Adams B (2024a) Exploring the impact of the output format on the evaluation of large language models for code translation. In: Proceedings of the 2024 IEEE\/ACM First International conference on ai foundation models and software engineering, pp 57\u201368","DOI":"10.1145\/3650105.3652301"},{"key":"10768_CR23","doi-asserted-by":"crossref","unstructured":"Macedo M, Tian Y, Nie P, Cogo FR, Adams B (2024b) InterTrans: leveraging transitive intermediate translations to enhance LLM-based code translation. arXiv:2411.01063","DOI":"10.1109\/ICSE55347.2025.00236"},{"key":"10768_CR24","unstructured":"Mistral AI Team (2023) Mixtral of experts. https:\/\/mistral.ai\/news\/mixtral-of-experts\/. Accessed 13 Jan 2024"},{"key":"10768_CR25","doi-asserted-by":"crossref","unstructured":"Nguyen AT, Nguyen TT, Nguyen TN (2013) Lexical statistical machine translation for language migration. In: Proceedings of the 2013 9th joint meeting on foundations of software engineering, pp 651\u2013654","DOI":"10.1145\/2491411.2494584"},{"key":"10768_CR26","doi-asserted-by":"crossref","unstructured":"Nguyen AT, Nguyen TT, Nguyen TN (2014) Migrating code with statistical machine translation. In: Companion Proceedings of the 36th International conference on software engineering, pp 544\u2013547","DOI":"10.1145\/2591062.2591072"},{"key":"10768_CR27","unstructured":"Nijkamp E, Pang B, Hayashi H, Tu L, Wang H, Zhou Y, Savarese S, Xiong C (2022) CodeGen: An open large language model for code with multi-turn program synthesis. In: The eleventh international conference on learning representations"},{"key":"10768_CR28","doi-asserted-by":"crossref","unstructured":"Pan R, Ibrahimzada AR, Krishna R, Sankar D, Wassi LP, Merler M, Sobolev B, Pavuluri R, Sinha S, Jabbarvand R (2024) Lost in translation: a study of bugs introduced by large language models while translating code. In: Proceedings of the IEEE\/ACM 46th International conference on software engineering, pp 1\u201313","DOI":"10.1145\/3597503.3639226"},{"key":"10768_CR29","doi-asserted-by":"publisher","unstructured":"Papineni K, Roukos S, Ward T, Zhu W-J (2001) BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th annual meeting on association for computational linguistics - ACL \u201902. Association for Computational Linguistics, Philadelphia, Pennsylvania, 311. https:\/\/doi.org\/10.3115\/1073083.1073135","DOI":"10.3115\/1073083.1073135"},{"key":"10768_CR30","doi-asserted-by":"publisher","unstructured":"Puri R, Kung DS, Janssen G, Zhang W, Domeniconi G, Zolotov V, Dolby J, Chen J, Choudhury M, Decker L,Thost V, Buratti L, Pujar S, Ramji S, Finkler U, Malaika S, Reiss F (2021) CodeNet: A large-scale AI for code dataset for learning a diversity of coding tasks. arXiv:2105.12655 [cs]. https:\/\/doi.org\/10.48550\/arXiv.2105.12655","DOI":"10.48550\/arXiv.2105.12655"},{"key":"10768_CR31","unstructured":"Ren S, Guo D, Lu S, Zhou L, Liu S, Tang D, Sundaresan N, Zhou M, Blanco A, Ma S (2020) CodeBLEU: a method for automatic evaluation of code synthesis. http:\/\/arxiv.org\/abs\/2009.10297arXiv:2009.10297 [cs]"},{"key":"10768_CR32","first-page":"20601","volume":"33","author":"B Roziere","year":"2020","unstructured":"Roziere B, Lachaux M-A, Chanussot L, Lample G (2020) Unsupervised translation of programming languages. Adv Neural Inf Process Syst 33:20601\u201320611","journal-title":"Adv Neural Inf Process Syst"},{"key":"10768_CR33","doi-asserted-by":"publisher","unstructured":"Rozi\u00e8re B, Gehring J, Gloeckle F, Sootla S, Gat I, Tan XE, Adi Y, Liu J, Remez T, Rapin J, Kozhevnikov A, Evtimov I, Bitton J, Bhatt M, Ferrer CC, Grattafiori A, Xiong W, D\u00e9fossez A, Copet J, Azhar F, Touvron H, Martin L, Usunier N, Scialom T, Synnaeve G (2023) Code Llama: open foundation models for code. arXiv:2308.12950 [cs]. https:\/\/doi.org\/10.48550\/arXiv.2308.12950","DOI":"10.48550\/arXiv.2308.12950"},{"key":"10768_CR34","unstructured":"Roziere B, Zhang JM, Charton F, Harman M, Synnaeve G, Lample G (2021) Leveraging automated unit tests for unsupervised code translation. arXiv:2110.06773"},{"key":"10768_CR35","doi-asserted-by":"publisher","unstructured":"Stol KJ, Ralph P, Fitzgerald B (2016) Grounded theory in software engineering research: a critical review and guidelines. In: Proceedings of the 38th International Conference on Software Engineering (ICSE \u201916). Association for Computing Machinery, New York, NY, USA, 120\u2013131. https:\/\/doi.org\/10.1145\/2884781.2884833","DOI":"10.1145\/2884781.2884833"},{"key":"10768_CR36","doi-asserted-by":"publisher","unstructured":"Szafraniec M, Roziere B, Leather H, Charton F, Labatut P, Synnaeve G (2023) Code translation with compiler representations. arXiv:2207.03578 [cs]. https:\/\/doi.org\/10.48550\/arXiv.2207.03578","DOI":"10.48550\/arXiv.2207.03578"},{"key":"10768_CR37","doi-asserted-by":"crossref","unstructured":"Wang Y, Wang W, Joty S, Hoi SCH (2021) Codet5: Identifier-aware unified pre-trained encoder-decoder models for code understanding and generation. arXiv:2109.00859","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"10768_CR38","doi-asserted-by":"publisher","unstructured":"Weisz JD, Muller M, Houde S, Richards J, Ross SI, Martinez F, Agarwal M, Talamadupula K (2021) Perfection not required? Human-AI Partnerships in Code Translation. In: 26th International conference on intelligent user interfaces. ACM, College Station TX USA, pp 402\u2013412. https:\/\/doi.org\/10.1145\/3397481.3450656","DOI":"10.1145\/3397481.3450656"},{"key":"10768_CR39","doi-asserted-by":"publisher","unstructured":"Wei Y, Wang Z, Liu J, Ding Y, Zhang (2023) Magicoder: source code is all you need. arXiv:2312.02120 [cs]. https:\/\/doi.org\/10.48550\/arXiv.2312.02120","DOI":"10.48550\/arXiv.2312.02120"},{"key":"10768_CR40","doi-asserted-by":"publisher","unstructured":"Wolf T, Debut L, Sanh V, Chaumond J, Delangue C, Moi A, Cistac P, Rault T, Louf R, Funtowicz M, Davison J, Shleifer S, von Platen P, Ma C, Jernite Y, Plu J, Xu C, Le Scao T, Gugger S, Drame M, Lhoest Q, Rush AM (2020) HuggingFace\u2019s transformers: state-of-the-art natural language processing. arXiv:1910.03771 [cs]. https:\/\/doi.org\/10.48550\/arXiv.1910.03771","DOI":"10.48550\/arXiv.1910.03771"},{"key":"10768_CR41","doi-asserted-by":"crossref","unstructured":"Yang Z, Liu F, Yu Z, Keung JW, Li J, Liu S, Hong Y, Ma X, Jin Z, Li G (2024) Exploring and unleashing the power of large language models in automated code translation. Proceedings of the ACM on Software Engineering 1. FSE 1585\u20131608","DOI":"10.1145\/3660778"},{"key":"10768_CR42","doi-asserted-by":"crossref","unstructured":"Zheng Q, Xia X, Zou X, Dong Y, Wang S, Xue Y, Wang Z, Shen L, Wang A, Li Y et\u00a0al (2023) Codegeex: A pre-trained model for code generation with multilingual evaluations on humaneval-x. arXiv:2303.17568","DOI":"10.1145\/3580305.3599790"},{"key":"10768_CR43","doi-asserted-by":"crossref","unstructured":"Zhu M, Suresh K, Reddy CK (2022a) Multilingual code snippets training for program translation. In: Proceedings of the AAAI conference on artificial intelligence vol 36, pp 11783\u201311790","DOI":"10.1609\/aaai.v36i10.21434"},{"key":"10768_CR44","unstructured":"Zhu M, Jain A, Suresh K, Ravindran R, Tipirneni S, Reddy CK (2022b) Xlcost: A benchmark dataset for cross-lingual code intelligence. arXiv:2206.08474"}],"container-title":["Empirical Software Engineering"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10768-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10664-025-10768-1","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10664-025-10768-1.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,14]],"date-time":"2026-01-14T04:33:42Z","timestamp":1768365222000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10664-025-10768-1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":44,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2026,3]]}},"alternative-id":["10768"],"URL":"https:\/\/doi.org\/10.1007\/s10664-025-10768-1","relation":{},"ISSN":["1382-3256","1573-7616"],"issn-type":[{"value":"1382-3256","type":"print"},{"value":"1573-7616","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,12,6]]},"assertion":[{"value":"23 October 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 November 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 December 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"not applicable","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical Approval"}},{"value":"not applicable","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Informed consent"}},{"value":"The authors declare that they have no conflict of interest.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of Interest"}},{"value":"Clinical trial number: not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Clinical Trial Number in the manuscript"}}],"article-number":"41"}}