{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T07:11:42Z","timestamp":1767337902257,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":78,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T00:00:00Z","timestamp":1726012800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,9,11]]},"DOI":"10.1145\/3650212.3680399","type":"proceedings-article","created":{"date-parts":[[2024,9,11]],"date-time":"2024-09-11T11:44:25Z","timestamp":1726055065000},"page":"1785-1797","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["Calico: Automated Knowledge Calibration and Diagnosis for Elevating AI Mastery in Code Tasks"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0001-3179-7433","authenticated-orcid":false,"given":"Yuxin","family":"Qiu","sequence":"first","affiliation":[{"name":"University of California at Riverside, Riverside, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3527-7396","authenticated-orcid":false,"given":"Jie","family":"Hu","sequence":"additional","affiliation":[{"name":"University of California at Riverside, Riverside, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1721-0907","authenticated-orcid":false,"given":"Qian","family":"Zhang","sequence":"additional","affiliation":[{"name":"University of California at Riverside, Riverside, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8942-7742","authenticated-orcid":false,"given":"Heng","family":"Yin","sequence":"additional","affiliation":[{"name":"University of California at Riverside, Riverside, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,9,11]]},"reference":[{"key":"e_1_3_2_1_1_1","unstructured":"2024. https:\/\/tree-sitter.github.io\/tree-sitter\/"},{"key":"e_1_3_2_1_2_1","unstructured":"2024. https:\/\/chat.openai.com\/"},{"key":"e_1_3_2_1_3_1","unstructured":"2024. https:\/\/tree-sitter.github.io\/tree-sitter\/using-parsers#pattern-matching-with-queries"},{"key":"e_1_3_2_1_4_1","unstructured":"2024. https:\/\/github.com\/tree-sitter\/tree-sitter-c\/blob\/master\/test\/corpus\/statements.txt"},{"key":"e_1_3_2_1_5_1","unstructured":"2024. https:\/\/huggingface.co\/razent\/cotext-2-cc"},{"key":"e_1_3_2_1_6_1","unstructured":"2024. https:\/\/huggingface.co\/Salesforce\/codet5-base"},{"key":"e_1_3_2_1_7_1","unstructured":"2024. GitHub Copilot. https:\/\/github.com\/features\/copilot"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3290353"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1002\/widm.1424"},{"key":"e_1_3_2_1_10_1","volume-title":"Language Models are Few-Shot Learners. CoRR, abs\/2005.14165","author":"Brown Tom B.","year":"2020","unstructured":"Tom B. Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, Sandhini Agarwal, Ariel Herbert-Voss, Gretchen Krueger, Tom Henighan, Rewon Child, Aditya Ramesh, Daniel M. Ziegler, Jeffrey Wu, Clemens Winter, Christopher Hesse, Mark Chen, Eric Sigler, Mateusz Litwin, Scott Gray, Benjamin Chess, Jack Clark, Christopher Berner, Sam McCandlish, Alec Radford, Ilya Sutskever, and Dario Amodei. 2020. Language Models are Few-Shot Learners. CoRR, abs\/2005.14165 (2020), arXiv:2005.14165. arxiv:2005.14165"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549162"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2021.11.003"},{"key":"e_1_3_2_1_13_1","unstructured":"Arun Das and Paul Rad. 2020. Opportunities and Challenges in Explainable Artificial Intelligence (XAI): A Survey. arxiv:2006.11371."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597926.3598067"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597926.3598067"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3623343"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00128"},{"volume-title":"2024 IEEE\/ACM 46th International Conference on Software Engineering (ICSE). IEEE Computer Society","author":"Feng S.","key":"e_1_3_2_1_18_1","unstructured":"S. Feng, W. Suo, Y. Wu, D. Zou, Y. Liu, and H. Jin. 2024. Machine Learning is All You Need: A Simple Token-based Approach for Effective Code Clone Detection. In 2024 IEEE\/ACM 46th International Conference on Software Engineering (ICSE). IEEE Computer Society, Los Alamitos, CA, USA. 1006\u20131006. issn:1558-1225 https:\/\/doi.ieeecomputersociety.org\/"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Zhangyin Feng Daya Guo Duyu Tang Nan Duan Xiaocheng Feng Ming Gong Linjun Shou Bing Qin Ting Liu Daxin Jiang and Ming Zhou. 2020. CodeBERT: A Pre-Trained Model for Programming and Natural Languages. arxiv:2002.08155.","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"volume-title":"Program Restructuring as an Aid to Software Maintenance. Ph. D. Dissertation","author":"Griswold William G.","key":"e_1_3_2_1_20_1","unstructured":"William G. Griswold. 1991. Program Restructuring as an Aid to Software Maintenance. Ph. D. Dissertation. University of Washington."},{"key":"e_1_3_2_1_21_1","volume-title":"Wang","author":"Gu Alex","year":"2024","unstructured":"Alex Gu, Baptiste Rozi\u00e8re, Hugh Leather, Armando Solar-Lezama, Gabriel Synnaeve, and Sida I. Wang. 2024. CRUXEval: A Benchmark for Code Reasoning, Understanding and Execution. arxiv:2401.03065."},{"volume-title":"Proceedings of the 7th International Workshop on Automating Test Case Design, Selection, and Evaluation. 31\u201337","author":"Ren\u00e1ta","key":"e_1_3_2_1_22_1","unstructured":"Ren\u00e1ta Hodov\u00e1n and \u00c1kos Kiss. 2016. Modernizing hierarchical delta debugging. In Proceedings of the 7th International Workshop on Automating Test Case Design, Selection, and Evaluation. 31\u201337."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2017.26"},{"key":"e_1_3_2_1_24_1","volume-title":"Fine-tuned Language Models for Text Classification. CoRR, abs\/1801.06146","author":"Howard Jeremy","year":"2018","unstructured":"Jeremy Howard and Sebastian Ruder. 2018. Fine-tuned Language Models for Text Classification. CoRR, abs\/1801.06146 (2018), arXiv:1801.06146. arxiv:1801.06146"},{"key":"e_1_3_2_1_25_1","unstructured":"Jie Hu Qian Zhang and Heng Yin. 2023. Augmenting Greybox Fuzzing with Generative AI. arxiv:2306.06782."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597926.3598040"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ASE56229.2023.00181"},{"key":"e_1_3_2_1_28_1","unstructured":"Hamel Husain Ho-Hsiang Wu Tiferet Gazit Miltiadis Allamanis and Marc Brockschmidt. 2020. CodeSearchNet Challenge: Evaluating the State of Semantic Code Search. arxiv:1909.09436."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3611643.3613892"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525412"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/1985793.1985815"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/2393596.2393655"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2014.2318734"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3278186.3278189"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2019.00087"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00085"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549081"},{"key":"e_1_3_2_1_38_1","unstructured":"Zongjie Li Chaozheng Wang Pingchuan Ma Chaowei Liu Shuai Wang Daoyuan Wu Cuiyun Gao and Yang Liu. 2023. On Extracting Specialized Code Abilities from Large Language Models: A Feasibility Study. arxiv:2303.03012."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3560815"},{"key":"e_1_3_2_1_40_1","unstructured":"Yuhan Liu Saurabh Agarwal and Shivaram Venkataraman. 2021. AutoFreeze: Automatically Freezing Model Blocks to Accelerate Fine-tuning. arxiv:2102.01386."},{"key":"e_1_3_2_1_41_1","volume-title":"Shengyu Fu, and Shujie Liu.","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, Ge Li, Lidong Zhou, Linjun Shou, Long Zhou, Michele Tufano, Ming Gong, Ming Zhou, Nan Duan, Neel Sundaresan, Shao Kun Deng, Shengyu Fu, and Shujie Liu. 2021. CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation. arxiv:2102.04664."},{"key":"e_1_3_2_1_42_1","volume-title":"Massimiliano Di Penta, and Gabriele Bavota","author":"Mastropaolo Antonio","year":"2023","unstructured":"Antonio Mastropaolo, Matteo Ciniselli, Massimiliano Di Penta, and Gabriele Bavota. 2023. Evaluating Code Summarization Techniques: A New Metric and an Empirical Characterization. arxiv:2312.15475."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2015.2465386"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.14722\/ndss.2024.24556"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/TSE.2004.1265817"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3605943"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/1134285.1134307"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2009.5070529"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/1082983.1083143"},{"key":"e_1_3_2_1_50_1","unstructured":"Changan Niu Chuanyi Li Bin Luo and Vincent Ng. 2022. Deep Learning Meets Software Engineering: A Survey on Pre-Trained Models of Source Code. arxiv:2205.11739."},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00180"},{"volume-title":"Refactoring Object-Oriented Frameworks. Ph. D. Dissertation","author":"Opdyke William F.","key":"e_1_3_2_1_52_1","unstructured":"William F. Opdyke. 1992. Refactoring Object-Oriented Frameworks. Ph. D. Dissertation. University of Illinois, Urbana-Champaign. IL, USA. citeseer.ist.psu.edu\/opdyke92refactoring.html"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSR.2019.00078"},{"key":"e_1_3_2_1_55_1","unstructured":"Shuo Ren Daya Guo Shuai Lu Long Zhou Shujie Liu Duyu Tang Neel Sundaresan Ming Zhou Ambrosio Blanco and Shuai Ma. 2020. CodeBLEU: a Method for Automatic Evaluation of Code Synthesis. arxiv:2009.10297."},{"key":"e_1_3_2_1_56_1","volume-title":"International Conference on Software Engineering: New Ideas and Emerging Results.","author":"Rukmono Satrio","year":"2024","unstructured":"Satrio Rukmono, Lina Ochoa Venegas, and MRV Chaudron. 2024. Deductive Software Architecture Recovery via Chain-of-thought Prompting. In International Conference on Software Engineering: New Ideas and Emerging Results."},{"key":"e_1_3_2_1_57_1","volume-title":"Noshin Ulfat, Fahmid Al Rifat, and Vinicius Carvalho Lopes.","author":"Siddiq Mohammed Latif","year":"2024","unstructured":"Mohammed Latif Siddiq, Joanna C. S. Santos, Ridwanul Hasan Tanvir, Noshin Ulfat, Fahmid Al Rifat, and Vinicius Carvalho Lopes. 2024. Using Large Language Models to Generate JUnit Tests: An Empirical Study. arxiv:2305.00418."},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/APR59189.2023.00012"},{"key":"e_1_3_2_1_59_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic Sergey Edunov and Thomas Scialom. 2023. Llama 2: Open Foundation and Fine-Tuned Chat Models. arxiv:2307.09288."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340544"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510621"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE43902.2021.00027"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE.2012.6227190"},{"key":"e_1_3_2_1_64_1","unstructured":"Ashish Vaswani Noam Shazeer Niki Parmar Jakob Uszkoreit Llion Jones Aidan N. Gomez Lukasz Kaiser and Illia Polosukhin. 2023. Attention Is All You Need. arxiv:1706.03762."},{"key":"e_1_3_2_1_65_1","unstructured":"Giulia Vilone and Luca Longo. 2020. Explainable Artificial Intelligence: a Systematic Review. arxiv:2006.00093."},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3468264.3468625"},{"key":"e_1_3_2_1_67_1","volume-title":"Aakanksha Chowdhery, and Denny Zhou.","author":"Wang Xuezhi","year":"2023","unstructured":"Xuezhi Wang, Jason Wei, Dale Schuurmans, Quoc Le, Ed Chi, Sharan Narang, Aakanksha Chowdhery, and Denny Zhou. 2023. Self-Consistency Improves Chain of Thought Reasoning in Language Models. arxiv:2203.11171."},{"key":"e_1_3_2_1_68_1","volume-title":"Hoi","author":"Wang Yue","year":"2021","unstructured":"Yue Wang, Weishi Wang, Shafiq Joty, and Steven C. H. Hoi. 2021. CodeT5: Identifier-aware Unified Pre-trained Encoder-Decoder Models for Code Understanding and Generation. arxiv:2109.00859."},{"key":"e_1_3_2_1_69_1","volume-title":"Chi, Quoc Le, and Denny Zhou","author":"Wei Jason","year":"2023","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, and Denny Zhou. 2023. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. arxiv:2201.11903."},{"key":"e_1_3_2_1_70_1","volume-title":"Tiffany Bao, Ning Zhang, Ruoyu \"Fish\" Wang, and Chaowei Xiao.","author":"Wu Fangzhou","year":"2023","unstructured":"Fangzhou Wu, Qingzhao Zhang, Ati Priya Bajaj, Tiffany Bao, Ning Zhang, Ruoyu \"Fish\" Wang, and Chaowei Xiao. 2023. Exploring the Limits of ChatGPT in Software Security Applications. arxiv:2312.05275."},{"key":"e_1_3_2_1_71_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSE48619.2023.00129"},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3623326"},{"key":"e_1_3_2_1_73_1","doi-asserted-by":"publisher","DOI":"10.1145\/3597503.3623342"},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/587051.587053"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1109\/32.988498"},{"key":"e_1_3_2_1_76_1","unstructured":"Quanjun Zhang Tongke Zhang Juan Zhai Chunrong Fang Bowen Yu Weisong Sun and Zhenyu Chen. 2023. A Critical Review of Large Language Model on Software Engineering: An Example from ChatGPT and Automated Program Repair. arxiv:2310.08879."},{"volume-title":"Devign: effective vulnerability identification by learning comprehensive program semantics via graph neural networks","author":"Zhou Yaqin","key":"e_1_3_2_1_77_1","unstructured":"Yaqin Zhou, Shangqing Liu, Jingkai Siow, Xiaoning Du, and Yang Liu. 2019. Devign: effective vulnerability identification by learning comprehensive program semantics via graph neural networks. Curran Associates Inc., Red Hook, NY, USA."},{"key":"e_1_3_2_1_78_1","doi-asserted-by":"publisher","DOI":"10.1145\/3611643.3616354"}],"event":{"name":"ISSTA '24: 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis","sponsor":["SIGSOFT ACM Special Interest Group on Software Engineering","AITO"],"location":"Vienna Austria","acronym":"ISSTA '24"},"container-title":["Proceedings of the 33rd ACM SIGSOFT International Symposium on Software Testing and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650212.3680399","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3650212.3680399","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:09Z","timestamp":1750294689000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3650212.3680399"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,11]]},"references-count":78,"alternative-id":["10.1145\/3650212.3680399","10.1145\/3650212"],"URL":"https:\/\/doi.org\/10.1145\/3650212.3680399","relation":{},"subject":[],"published":{"date-parts":[[2024,9,11]]},"assertion":[{"value":"2024-09-11","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}