{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,8]],"date-time":"2026-03-08T00:48:20Z","timestamp":1772930900630,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":45,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,27]],"date-time":"2024-10-27T00:00:00Z","timestamp":1729987200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"Virginia Commonwealth Cyber Initiative (CCI) and the Office of Naval Research","award":["N00014-22-1-2057"],"award-info":[{"award-number":["N00014-22-1-2057"]}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-1929701"],"award-info":[{"award-number":["CNS-1929701"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,27]]},"DOI":"10.1145\/3691620.3695524","type":"proceedings-article","created":{"date-parts":[[2024,10,18]],"date-time":"2024-10-18T15:39:19Z","timestamp":1729265959000},"page":"1545-1556","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Semi-Supervised Code Translation Overcoming the Scarcity of Parallel Code Data"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9128-076X","authenticated-orcid":false,"given":"Ming","family":"Zhu","sequence":"first","affiliation":[{"name":"Virginia Tech, Blacksburg, Virginia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2808-7788","authenticated-orcid":false,"given":"Mohimenul","family":"Karim","sequence":"additional","affiliation":[{"name":"Virginia Tech, Blacksburg, Virginia, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1238-772X","authenticated-orcid":false,"given":"Ismini","family":"Lourentzou","sequence":"additional","affiliation":[{"name":"University of Illinois Urbana - Champaign, Urbana - Champaign, Illinois, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8969-2792","authenticated-orcid":false,"given":"Daphne","family":"Yao","sequence":"additional","affiliation":[{"name":"Virginia Tech, Blacksburg, Virginia, USA"}]}],"member":"320","published-online":{"date-parts":[[2024,10,27]]},"reference":[{"key":"e_1_3_2_1_2_1","volume-title":"A Learning Algorithm for Boltzmann Machines. Cognitive science 9, 1","author":"Ackley David H","year":"1985","unstructured":"David H Ackley, Geoffrey E Hinton, and Terrence J Sejnowski. 1985. A Learning Algorithm for Boltzmann Machines. Cognitive science 9, 1 (1985), 147--169."},{"key":"e_1_3_2_1_3_1","volume-title":"Using Document Similarity Methods to Create Parallel Datasets for Code Translation. arXiv:2110.05423","author":"Agarwal Mayank","year":"2021","unstructured":"Mayank Agarwal, Kartik Talamadupula, Fernando Martinez, Stephanie Houde, Michael Muller, John Richards, Steven I Ross, and Justin D Weisz. 2021. Using Document Similarity Methods to Create Parallel Datasets for Code Translation. arXiv:2110.05423 (2021)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.naacl-main.211"},{"key":"e_1_3_2_1_5_1","volume-title":"Summarize and Generate to Back-translate: Unsupervised Translation of Programming Languages. arXiv:2205.11116","author":"Ahmad Wasi Uddin","year":"2022","unstructured":"Wasi Uddin Ahmad, Saikat Chakraborty, Baishakhi Ray, and Kai-Wei Chang. 2022. Summarize and Generate to Back-translate: Unsupervised Translation of Programming Languages. arXiv:2205.11116 (2022)."},{"key":"e_1_3_2_1_6_1","volume-title":"Saikat Chakraborty, and Kai-Wei Chang.","author":"Ahmad Wasi Uddin","year":"2021","unstructured":"Wasi Uddin Ahmad, Md Golam Rahman Tushar, Saikat Chakraborty, and Kai-Wei Chang. 2021. AVATAR: A Parallel Corpus for Java-Python Program Translation. arXiv:2108.11590 (2021)."},{"key":"e_1_3_2_1_7_1","volume-title":"An Effective Approach to Unsupervised Machine Translation. arXiv:1902.01313","author":"Artetxe Mikel","year":"2019","unstructured":"Mikel Artetxe, Gorka Labaka, and Eneko Agirre. 2019. An Effective Approach to Unsupervised Machine Translation. arXiv:1902.01313 (2019)."},{"key":"e_1_3_2_1_8_1","volume-title":"Unsupervised Neural Machine Translation. arXiv:1710.11041","author":"Artetxe Mikel","year":"2017","unstructured":"Mikel Artetxe, Gorka Labaka, Eneko Agirre, and Kyunghyun Cho. 2017. Unsupervised Neural Machine Translation. arXiv:1710.11041 (2017)."},{"key":"e_1_3_2_1_9_1","volume-title":"Garnett (Eds.)","volume":"31","author":"Chen Xinyun","year":"2018","unstructured":"Xinyun Chen, Chang Liu, and Dawn Song. 2018. Tree-to-tree Neural Networks for Program Translation. In Advances in Neural Information Processing Systems, S. Bengio, H. Wallach, H. Larochelle, K. Grauman, N. Cesa-Bianchi, and R. Garnett (Eds.), Vol. 31. Curran Associates, Inc. https:\/\/proceedings.neurips.cc\/paper\/2018\/file\/d759175de8ea5b1d9a2660e45554894f-Paper.pdf"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N19-1423"},{"key":"e_1_3_2_1_11_1","volume-title":"Understanding Back-Translation at Scale. arXiv:1808.09381","author":"Edunov Sergey","year":"2018","unstructured":"Sergey Edunov, Myle Ott, Michael Auli, and David Grangier. 2018. Understanding Back-Translation at Scale. arXiv:1808.09381 (2018)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"e_1_3_2_1_13_1","volume-title":"InCoder: A Generative Model for Code Infilling and Synthesis. arXiv:2204.05999","author":"Fried Daniel","year":"2022","unstructured":"Daniel Fried, Armen Aghajanyan, Jessy Lin, Sida Wang, Eric Wallace, Freda Shi, Ruiqi Zhong, Wen-tau Yih, Luke Zettlemoyer, and Mike Lewis. 2022. InCoder: A Generative Model for Code Infilling and Synthesis. arXiv:2204.05999 (2022)."},{"key":"e_1_3_2_1_14_1","unstructured":"Daya Guo Shuo Ren Shuai Lu Zhangyin Feng Duyu Tang Shujie Liu Long Zhou Nan Duan Alexey Svyatkovskiy Shengyu Fu et al. 2020. GraphCodeBERT: Pre-training Code Representations with Data Flow. arXiv:2009.08366 (2020)."},{"key":"e_1_3_2_1_15_1","volume-title":"Distilling the Knowledge in a Neural Network. arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the Knowledge in a Neural Network. arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.emnlp-main.672"},{"key":"e_1_3_2_1_17_1","volume-title":"Codesearchnet Challenge: Evaluating the State of Semantic Code Search. arXiv:1909.09436","author":"Husain Hamel","year":"2019","unstructured":"Hamel Husain, Ho-Hsiang Wu, Tiferet Gazit, Miltiadis Allamanis, and Marc Brockschmidt. 2019. Codesearchnet Challenge: Evaluating the State of Semantic Code Search. arXiv:1909.09436 (2019)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2661136.2661148"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICAC3N56670.2022.10074182"},{"key":"e_1_3_2_1_20_1","volume-title":"Cross-lingual Language Model Pretraining. arXiv e-prints","author":"Lample Guillaume","year":"2019","unstructured":"Guillaume Lample and Alexis Conneau. 2019. Cross-lingual Language Model Pretraining. arXiv e-prints (2019), arXiv-1901."},{"key":"e_1_3_2_1_21_1","volume-title":"Unsupervised Machine Translation Using Monolingual Corpora Only. In International Conference on Learning Representations.","author":"Lample Guillaume","year":"2018","unstructured":"Guillaume Lample, Alexis Conneau, Ludovic Denoyer, and Marc'Aurelio Ranzato. 2018. Unsupervised Machine Translation Using Monolingual Corpora Only. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00343"},{"key":"e_1_3_2_1_24_1","unstructured":"Yinhan Liu Myle Ott Naman Goyal Jingfei Du Mandar Joshi Danqi Chen Omer Levy Mike Lewis Luke Zettlemoyer and Veselin Stoyanov. 2019. RoBERTa: A Robustly Optimized BERT Pretraining Approach. (2019)."},{"key":"e_1_3_2_1_25_1","volume-title":"CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1).","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, et al. 2021. CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2491411.2494584"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE.2015.74"},{"key":"e_1_3_2_1_28_1","volume-title":"CodeGen: An Open Large Language Model for Code with Multi-Turn Program Synthesis. In The Eleventh International Conference on Learning Representations.","author":"Nijkamp Erik","year":"2022","unstructured":"Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, and Caiming Xiong. 2022. CodeGen: An Open Large Language Model for Code with Multi-Turn Program Synthesis. In The Eleventh International Conference on Learning Representations."},{"key":"e_1_3_2_1_29_1","unstructured":"Long Ouyang Jeffrey Wu Xu Jiang Diogo Almeida Carroll Wainwright Pamela Mishkin Chong Zhang Sandhini Agarwal Katarina Slama Alex Ray et al. 2022. Training Language Models to Follow Instructions with Human Feedback. Advances in neural information processing systems 35 (2022) 27730--27744."},{"key":"e_1_3_2_1_30_1","volume-title":"Rahul Krishna, Divya Sankar, Lambert Pouguem Wassi, Michele Merler, Boris Sobolev, Raju Pavuluri, Saurabh Sinha, and Reyhaneh Jabbarvand.","author":"Pan Rangeet","year":"2023","unstructured":"Rangeet Pan, Ali Reza Ibrahimzada, Rahul Krishna, Divya Sankar, Lambert Pouguem Wassi, Michele Merler, Boris Sobolev, Raju Pavuluri, Saurabh Sinha, and Reyhaneh Jabbarvand. 2023. Understanding the Effectiveness of Large Language Models in Code Translation. arXiv:2308.03109 (2023)."},{"key":"e_1_3_2_1_31_1","unstructured":"Ruchir Puri David S Kung Geert Janssen Wei Zhang Giacomo Domeniconi Vladmir Zolotov Julian Dolby Jie Chen Mihir Choudhury Lindsey Decker et al. 2021. Project CodeNet: A Large-Scale AI for Code Dataset for Learning a Diversity of Coding Tasks. arXiv:2105.12655 (2021)."},{"key":"e_1_3_2_1_32_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21, 140 (2020), 1--67.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3319535.3345659"},{"key":"e_1_3_2_1_34_1","volume-title":"Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al.","author":"Roziere Baptiste","year":"2023","unstructured":"Baptiste Roziere, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, J\u00e9r\u00e9my Rapin, et al. 2023. Code Llama: Open Foundation Models for Code. arXiv:2308.12950 (2023)."},{"key":"e_1_3_2_1_35_1","unstructured":"Baptiste Roziere Marie-Anne Lachaux Lowik Chanussot and Guillaume Lample. 2020. Unsupervised Translation of Programming Languages.. In NeurIPS."},{"key":"e_1_3_2_1_36_1","volume-title":"DOBF: A Deobfuscation Pre-Training Objective for Programming Languages. arXiv:2102.07492","author":"Roziere Baptiste","year":"2021","unstructured":"Baptiste Roziere, Marie-Anne Lachaux, Marc Szafraniec, and Guillaume Lample. 2021. DOBF: A Deobfuscation Pre-Training Objective for Programming Languages. arXiv:2102.07492 (2021)."},{"key":"e_1_3_2_1_37_1","volume-title":"Leveraging Automated Unit Tests for Unsupervised Code Translation. In International Conference on Learning Representations.","author":"Roziere Baptiste","year":"2021","unstructured":"Baptiste Roziere, Jie Zhang, Francois Charton, Mark Harman, Gabriel Synnaeve, and Guillaume Lample. 2021. Leveraging Automated Unit Tests for Unsupervised Code Translation. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_38_1","volume-title":"Patrick Labatut, and Gabriel Synnaeve.","author":"Szafraniec Marc","year":"2022","unstructured":"Marc Szafraniec, Baptiste Roziere, Hugh Leather Francois Charton, Patrick Labatut, and Gabriel Synnaeve. 2022. Code Translation with Compiler Representations. arXiv:2207.03578 (2022)."},{"key":"e_1_3_2_1_39_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open Foundation and Fine-tuned Chat Models. arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_1_40_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N Gomez \u0141ukasz Kaiser and Illia Polosukhin. 2017. Attention is All You Need. In Advances in neural information processing systems. 5998--6008."},{"key":"e_1_3_2_1_41_1","volume-title":"Nghi DQ Bui, Junnan Li, and Steven CH Hoi.","author":"Wang Yue","year":"2023","unstructured":"Yue Wang, Hung Le, Akhilesh Deepak Gotmare, Nghi DQ Bui, Junnan Li, and Steven CH Hoi. 2023. CodeT5+: Open Code Large Language Models for Code Understanding and Generation. arXiv:2305.07922 (2023)."},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1145\/3625291"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2023.3265362"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599790"},{"key":"e_1_3_2_1_46_1","volume-title":"Multilingual Code Snippets Training for Program Translation. In 36th AAAI Conference on Artificial Intelligence (AAAI).","author":"Zhu Ming","year":"2022","unstructured":"Ming Zhu, Karthik Suresh, and Chandan K Reddy. 2022. Multilingual Code Snippets Training for Program Translation. In 36th AAAI Conference on Artificial Intelligence (AAAI)."}],"event":{"name":"ASE '24: 39th IEEE\/ACM International Conference on Automated Software Engineering","location":"Sacramento CA USA","acronym":"ASE '24","sponsor":["SIGAI ACM Special Interest Group on Artificial Intelligence","SIGSOFT ACM Special Interest Group on Software Engineering","IEEE CS"]},"container-title":["Proceedings of the 39th IEEE\/ACM International Conference on Automated Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3691620.3695524","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/abs\/10.1145\/3691620.3695524","content-type":"text\/html","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3691620.3695524","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:09:39Z","timestamp":1750295379000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3691620.3695524"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,27]]},"references-count":45,"alternative-id":["10.1145\/3691620.3695524","10.1145\/3691620"],"URL":"https:\/\/doi.org\/10.1145\/3691620.3695524","relation":{},"subject":[],"published":{"date-parts":[[2024,10,27]]},"assertion":[{"value":"2024-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}