{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T02:19:55Z","timestamp":1776824395273,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2022,10,10]],"date-time":"2022-10-10T00:00:00Z","timestamp":1665360000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Singapore Data Science Consortium","award":["SDSC-2020-001"],"award-info":[{"award-number":["SDSC-2020-001"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2022,10,10]]},"DOI":"10.1145\/3551349.3556964","type":"proceedings-article","created":{"date-parts":[[2023,1,5]],"date-time":"2023-01-05T20:43:54Z","timestamp":1672951434000},"page":"1-12","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":38,"title":["Compressing Pre-trained Models of Code into 3 MB"],"prefix":"10.1145","author":[{"given":"Jieke","family":"Shi","sequence":"first","affiliation":[{"name":"Singapore Management University, Singapore"}]},{"given":"Zhou","family":"Yang","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore"}]},{"given":"Bowen","family":"Xu","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore"}]},{"given":"Hong Jin","family":"Kang","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore"}]},{"given":"David","family":"Lo","sequence":"additional","affiliation":[{"name":"Singapore Management University, Singapore"}]}],"member":"320","published-online":{"date-parts":[[2023,1,5]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Proceedings of the 2021 Conference of the North American","author":"Ahmad Wasi","unstructured":"Wasi Ahmad, Saikat Chakraborty, Baishakhi Ray, and Kai-Wei Chang. 2021. Unified Pre-training for Program Understanding and Generation. In Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies. Association for Computational Linguistics, Online, 2655\u20132668."},{"key":"e_1_3_2_1_2_1","unstructured":"Gareth\u00a0Ari Aye and Gail\u00a0E. Kaiser. 2020. Sequence Model Design for Code Completion in the Modern IDE. (2020). arXiv:2004.05249"},{"key":"e_1_3_2_1_3_1","volume-title":"Proceedings of the 27th International Conference on Neural Information Processing Systems-Volume 2. 2654\u20132662","author":"Ba Lei\u00a0Jimmy","year":"2014","unstructured":"Lei\u00a0Jimmy Ba and Rich Caruana. 2014. Do deep nets really need to be deep?. In Proceedings of the 27th International Conference on Neural Information Processing Systems-Volume 2. 2654\u20132662."},{"key":"e_1_3_2_1_4_1","unstructured":"Luca Buratti Saurabh Pujar Mihaela\u00a0A. Bornea J.\u00a0Scott McCarley Yunhui Zheng Gaetano Rossiello Alessandro Morari Jim Laredo Veronika Thost Yufan Zhuang and Giacomo Domeniconi. 2020. Exploring Software Naturalness through Neural Language Models. (2020). arxiv:2006.12641"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/341"},{"key":"e_1_3_2_1_6_1","volume-title":"On the Efficacy of Knowledge Distillation. In 2019 IEEE\/CVF International Conference on Computer Vision, ICCV","author":"Cho Jang\u00a0Hyun","year":"2019","unstructured":"Jang\u00a0Hyun Cho and Bharath Hariharan. 2019. On the Efficacy of Knowledge Distillation. In 2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019. IEEE, 4793\u20134801."},{"key":"e_1_3_2_1_7_1","volume-title":"2020 8th International Conference on Learning Representations.","author":"Clark Kevin","year":"2020","unstructured":"Kevin Clark, Minh-Thang Luong, Quoc\u00a0V. Le, and Christopher\u00a0D. Manning. 2020. ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators. In 2020 8th International Conference on Learning Representations."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT","volume":"1","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, NAACL-HLT 2019, Volume 1 (Long and Short Papers). Association for Computational Linguistics, 4171\u20134186."},{"key":"e_1_3_2_1_9_1","volume-title":"Reducing Transformer Depth on Demand with Structured Dropout. In 2020 8th International Conference on Learning Representations.","author":"Fan Angela","year":"2020","unstructured":"Angela Fan, Edouard Grave, and Armand Joulin. 2020. Reducing Transformer Depth on Demand with Structured Dropout. In 2020 8th International Conference on Learning Representations."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.139"},{"key":"e_1_3_2_1_11_1","volume-title":"Compressing Large-Scale Transformer-Based Models: A Case Study on BERT. Transactions of the Association for Computational Linguistics 9 (09","author":"Ganesh Prakhar","year":"2021","unstructured":"Prakhar Ganesh, Yao Chen, Xin Lou, Mohammad\u00a0Ali Khan, Yin Yang, Hassan Sajjad, Preslav Nakov, Deming Chen, and Marianne Winslett. 2021. Compressing Large-Scale Transformer-Based Models: A Case Study on BERT. Transactions of the Association for Computational Linguistics 9 (09 2021), 1061\u20131080."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.10.113"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.repl4nlp-1.18"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"e_1_3_2_1_15_1","volume-title":"GraphCodeBERT: Pre-training Code Representations with Data Flow. In 2021 9th International Conference on Learning Representations.","author":"Guo Daya","year":"2021","unstructured":"Daya Guo, Shuo Ren, Shuai Lu, Zhangyin Feng, Duyu Tang, Shujie LIU, Long Zhou, Nan Duan, Alexey Svyatkovskiy, Shengyu Fu, Michele Tufano, Shao\u00a0Kun Deng, Colin Clement, Dawn Drain, Neel Sundaresan, Jian Yin, Daxin Jiang, and Ming Zhou. 2021. GraphCodeBERT: Pre-training Code Representations with Data Flow. In 2021 9th International Conference on Learning Representations."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1145\/3524610.3527897"},{"key":"e_1_3_2_1_17_1","volume-title":"2015 NIPS Deep Learning and Representation Learning Workshop.","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeffrey Dean. 2015. Distilling the Knowledge in a Neural Network. In 2015 NIPS Deep Learning and Representation Learning Workshop."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3540250.3549141"},{"key":"e_1_3_2_1_19_1","unstructured":"Hamel Husain Ho-Hsiang Wu Tiferet Gazit Miltiadis Allamanis and Marc Brockschmidt. 2019. CodeSearchNet Challenge: Evaluating the State of Semantic Code Search. (2019). arXiv:1909.09436"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525412"},{"key":"e_1_3_2_1_22_1","volume-title":"Proceedings of the 37th International Conference on Machine Learning, ICML 2020","author":"Kanade Aditya","year":"2020","unstructured":"Aditya Kanade, Petros Maniatis, Gogul Balakrishnan, and Kensen Shi. 2020. Learning and Evaluating Contextual Embedding of Source Code. In Proceedings of the 37th International Conference on Machine Learning, ICML 2020, 13-18 July 2020, Virtual Event(Proceedings of Machine Learning Research, Vol.\u00a0119). PMLR, 5110\u20135121."},{"key":"e_1_3_2_1_23_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0139)","author":"Kim Sehoon","year":"2021","unstructured":"Sehoon Kim, Amir Gholami, Zhewei Yao, Michael\u00a0W. Mahoney, and Kurt Keutzer. 2021. I-BERT: Integer-only BERT Quantization. In Proceedings of the 38th International Conference on Machine Learning(Proceedings of Machine Learning Research, Vol.\u00a0139). PMLR, 5506\u20135518."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.537"},{"key":"e_1_3_2_1_26_1","volume-title":"Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692(2019).","author":"Liu Yinhan","year":"2019","unstructured":"Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, and Veselin Stoyanov. 2019. Roberta: A robustly optimized bert pretraining approach. arXiv preprint arXiv:1907.11692(2019)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240567"},{"key":"e_1_3_2_1_28_1","volume-title":"CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1).","author":"Lu Shuai","year":"2021","unstructured":"Shuai Lu, Daya Guo, Shuo Ren, Junjie Huang, Alexey Svyatkovskiy, Ambrosio Blanco, Colin Clement, Dawn Drain, Daxin Jiang, Duyu Tang, 2021. CodeXGLUE: A Machine Learning Benchmark Dataset for Code Understanding and Generation. In Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 1)."},{"key":"e_1_3_2_1_29_1","volume-title":"Advances in Neural Information Processing Systems, Vol.\u00a032. Curran Associates","author":"Michel Paul","unstructured":"Paul Michel, Omer Levy, and Graham Neubig. 2019. Are Sixteen Heads Really Better than One?. In Advances in Neural Information Processing Systems, Vol.\u00a032. Curran Associates, Inc."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"e_1_3_2_1_31_1","volume-title":"Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC\u201912)","author":"Petrov Slav","year":"2012","unstructured":"Slav Petrov, Dipanjan Das, and Ryan McDonald. 2012. A Universal Part-of-Speech Tagset. In Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC\u201912). 2089\u20132096."},{"key":"e_1_3_2_1_32_1","volume-title":"Language models are unsupervised multitask learners. OpenAI blog 1, 8","author":"Radford Alec","year":"2019","unstructured":"Alec Radford, Jeffrey Wu, Rewon Child, David Luan, Dario Amodei, Ilya Sutskever, 2019. Language models are unsupervised multitask learners. OpenAI blog 1, 8 (2019), 9."},{"key":"e_1_3_2_1_33_1","first-page":"1","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter\u00a0J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research 21, 140 (2020), 1\u201367.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_34_1","unstructured":"Victor Sanh Lysandre Debut Julien Chaumond and Thomas Wolf. 2019. DistilBERT a distilled version of BERT: smaller faster cheaper and lighter. (2019). arXiv:1910.01108"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER53432.2022.00130"},{"key":"e_1_3_2_1_36_1","volume-title":"Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)","author":"Sun Siqi","unstructured":"Siqi Sun, Yu Cheng, Zhe Gan, and Jingjing Liu. 2019. Patient Knowledge Distillation for BERT Model Compression. In Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP). Association for Computational Linguistics, Hong Kong, China, 4323\u20134332."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICSME.2014.77"},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/3368089.3417058"},{"key":"e_1_3_2_1_40_1","volume-title":"Fast and Memory-Efficient Neural Code Completion. In 2021 IEEE\/ACM 18th International Conference on Mining Software Repositories (MSR). 329\u2013340","author":"Svyatkovskiy Alexey","year":"2021","unstructured":"Alexey Svyatkovskiy, Sebastian Lee, Anna Hadjitofi, Maik Riechert, Juliana\u00a0Vicente Franco, and Miltiadis Allamanis. 2021. Fast and Memory-Efficient Neural Code Completion. In 2021 IEEE\/ACM 18th International Conference on Mining Software Repositories (MSR). 329\u2013340."},{"key":"e_1_3_2_1_41_1","unstructured":"Raphael Tang Yao Lu Linqing Liu Lili Mou Olga Vechtomova and Jimmy Lin. 2019. Distilling Task-Specific Knowledge from BERT into Simple Neural Networks. (2019). arXiv:1903.12136"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.5555\/211359"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.685"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.633"},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3447548.3467262"},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1109\/SANER53432.2022.00054"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3510003.3510146"},{"key":"e_1_3_2_1_48_1","volume-title":"IncBL: Incremental Bug Localization. In 2021 36th IEEE\/ACM International Conference on Automated Software Engineering (ASE). 1223\u20131226","author":"Yang Zhou","year":"2021","unstructured":"Zhou Yang, Jieke Shi, Shaowei Wang, and David Lo. 2021. IncBL: Incremental Bug Localization. In 2021 36th IEEE\/ACM International Conference on Automated Software Engineering (ASE). 1223\u20131226."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3071662"},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00071"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/EMC2-NIPS53020.2019.00016"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.37"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.eacl-main.238"},{"key":"e_1_3_2_1_55_1","volume-title":"Proceedings of the 33rd International Conference on Neural Information Processing Systems. 10197\u201310207","author":"Zhou Yaqin","year":"2019","unstructured":"Yaqin Zhou, Shangqing Liu, Jingkai Siow, Xiaoning Du, and Yang Liu. 2019. Devign: effective vulnerability identification by learning comprehensive program semantics via graph neural networks. In Proceedings of the 33rd International Conference on Neural Information Processing Systems. 10197\u201310207."}],"event":{"name":"ASE '22: 37th IEEE\/ACM International Conference on Automated Software Engineering","location":"Rochester MI USA","acronym":"ASE '22"},"container-title":["Proceedings of the 37th IEEE\/ACM International Conference on Automated Software Engineering"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3551349.3556964","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3551349.3556964","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T07:54:42Z","timestamp":1755849282000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3551349.3556964"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,10,10]]},"references-count":55,"alternative-id":["10.1145\/3551349.3556964","10.1145\/3551349"],"URL":"https:\/\/doi.org\/10.1145\/3551349.3556964","relation":{},"subject":[],"published":{"date-parts":[[2022,10,10]]},"assertion":[{"value":"2023-01-05","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}