{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:24:55Z","timestamp":1775229895419,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":56,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Natural Science Foundation of China","award":["61972328"],"award-info":[{"award-number":["61972328"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583421","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:25Z","timestamp":1682551825000},"page":"3299-3308","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":16,"title":["PROD: Progressive Distillation for Dense Retrieval"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9172-1628","authenticated-orcid":false,"given":"Zhenghao","family":"Lin","sequence":"first","affiliation":[{"name":"School of Informatics, Xiamen University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9954-9674","authenticated-orcid":false,"given":"Yeyun","family":"Gong","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8893-366X","authenticated-orcid":false,"given":"Xiao","family":"Liu","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9940-3517","authenticated-orcid":false,"given":"Hang","family":"Zhang","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2275-997X","authenticated-orcid":false,"given":"Chen","family":"Lin","sequence":"additional","affiliation":[{"name":"School of Informatics, Xiamen University, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8241-4746","authenticated-orcid":false,"given":"Anlei","family":"Dong","sequence":"additional","affiliation":[{"name":"Microsoft, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4779-9588","authenticated-orcid":false,"given":"Jian","family":"Jiao","sequence":"additional","affiliation":[{"name":"Microsoft, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8208-898X","authenticated-orcid":false,"given":"Jingwen","family":"Lu","sequence":"additional","affiliation":[{"name":"Microsoft, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6657-5806","authenticated-orcid":false,"given":"Daxin","family":"Jiang","sequence":"additional","affiliation":[{"name":"Microsoft, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2430-575X","authenticated-orcid":false,"given":"Rangan","family":"Majumder","sequence":"additional","affiliation":[{"name":"Microsoft, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3387-4674","authenticated-orcid":false,"given":"Nan","family":"Duan","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Robust Cross-Modal Representation Learning with Progressive Self-Distillation","author":"Andonian Alex","unstructured":"Alex Andonian, Shixing Chen, and Raffay Hamid. 2022. Robust Cross-Modal Representation Learning with Progressive Self-Distillation. In CVPR. IEEE, 16409\u201316420."},{"key":"e_1_3_2_1_2_1","volume-title":"Language models are few-shot learners. Advances in neural information processing systems 33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, 2020. Language models are few-shot learners. Advances in neural information processing systems 33 (2020), 1877\u20131901."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.52"},{"key":"e_1_3_2_1_4_1","unstructured":"Pengguang Chen Shu Liu Hengshuang Zhao and Jiaya Jia. 2021. Distilling Knowledge via Knowledge Review. In CVPR. 5008\u20135017."},{"key":"e_1_3_2_1_5_1","volume-title":"Overview of the TREC 2019 deep learning track. CoRR abs\/2003","author":"Craswell Nick","year":"2020","unstructured":"Nick Craswell, Bhaskar Mitra, Emine Yilmaz, Daniel Campos, and Ellen\u00a0M. Voorhees. 2020. Overview of the TREC 2019 deep learning track. CoRR abs\/2003.07820 (2020). arXiv:2003.07820https:\/\/arxiv.org\/abs\/2003.07820"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Zhuyun Dai and Jamie Callan. 2019. Deeper Text Understanding for IR with Contextual Neural Language Modeling. In SIGIR. 985\u2013988.","DOI":"10.1145\/3331184.3331303"},{"key":"e_1_3_2_1_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_8_1","volume-title":"Proceedings of the 39th International Conference on Machine Learning, Kamalika Chaudhuri, Stefanie Jegelka, Le\u00a0Song, Csaba Szepesv\u00e1ri, Gang Niu, and Sivan Sabato (Eds.). Vol.\u00a0162","author":"Ethayarajh Kawin","year":"2022","unstructured":"Kawin Ethayarajh, Yejin Choi, and Swabha Swayamdipta. 2022. Understanding Dataset Difficulty with V-Usable Information. In Proceedings of the 39th International Conference on Machine Learning, Kamalika Chaudhuri, Stefanie Jegelka, Le\u00a0Song, Csaba Szepesv\u00e1ri, Gang Niu, and Sivan Sabato (Eds.). Vol.\u00a0162. 5988\u20136008. https:\/\/proceedings.mlr.press\/v162\/ethayarajh22a.html"},{"key":"e_1_3_2_1_9_1","volume-title":"SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval. CoRR abs\/2109.10086","author":"Formal Thibault","year":"2021","unstructured":"Thibault Formal, Carlos Lassance, Benjamin Piwowarski, and St\u00e9phane Clinchant. 2021. SPLADE v2: Sparse Lexical and Expansion Model for Information Retrieval. CoRR abs\/2109.10086 (2021). arXiv:2109.10086https:\/\/arxiv.org\/abs\/2109.10086"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"crossref","unstructured":"Takashi Fukuda Masayuki Suzuki Gakuto Kurata Samuel Thomas Jia Cui and Bhuvana Ramabhadran. 2017. Efficient Knowledge Distillation from an Ensemble of Teachers. In INTERSPEECH. ISCA 3697\u20133701.","DOI":"10.21437\/Interspeech.2017-614"},{"key":"e_1_3_2_1_11_1","volume-title":"Distilling the Knowledge in a Neural Network. CoRR abs\/1503.02531","author":"Hinton E.","year":"2015","unstructured":"Geoffrey\u00a0E. Hinton, Oriol Vinyals, and Jeffrey Dean. 2015. Distilling the Knowledge in a Neural Network. CoRR abs\/1503.02531 (2015)."},{"key":"e_1_3_2_1_12_1","volume-title":"Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation. CoRR abs\/2010.02666","author":"Hofst\u00e4tter Sebastian","year":"2020","unstructured":"Sebastian Hofst\u00e4tter, Sophia Althammer, Michael Schr\u00f6der, Mete Sertkan, and Allan Hanbury. 2020. Improving Efficient Neural Ranking Models with Cross-Architecture Knowledge Distillation. CoRR abs\/2010.02666 (2020). arXiv:2010.02666https:\/\/arxiv.org\/abs\/2010.02666"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","unstructured":"Sebastian Hofst\u00e4tter Sheng-Chieh Lin Jheng-Hong Yang Jimmy Lin and Allan Hanbury. 2021. Efficiently Teaching an Effective Dense Retriever with Balanced Topic Aware Sampling. In Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval Fernando Diaz Chirag Shah Torsten Suel Pablo Castells Rosie Jones and Tetsuya Sakai (Eds.). 113\u2013122. https:\/\/doi.org\/10.1145\/3404835.3462891","DOI":"10.1145\/3404835.3462891"},{"key":"e_1_3_2_1_14_1","volume-title":"ACL (1)","author":"Huang Shaoyi","unstructured":"Shaoyi Huang, Dongkuan Xu, Ian\u00a0En-Hsu Yen, Yijue Wang, Sung-En Chang, Bingbing Li, Shiyang Chen, Mimi Xie, Sanguthevar Rajasekaran, Hang Liu, and Caiwen Ding. 2022. Sparse Progressive Distillation: Resolving Overfitting under Pretrain-and-Finetune Paradigm. In ACL (1). Association for Computational Linguistics, 190\u2013200."},{"key":"e_1_3_2_1_15_1","volume-title":"Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351","author":"Jiao Xiaoqi","year":"2019","unstructured":"Xiaoqi Jiao, Yichun Yin, Lifeng Shang, Xin Jiang, Xiao Chen, Linlin Li, Fang Wang, and Qun Liu. 2019. Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351 (2019)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/TBDATA.2019.2921572"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401075"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Taehyeon Kim Jaehoon Oh Nakyil Kim Sangwook Cho and Se-Young Yun. 2021. Comparing Kullback-Leibler Divergence and Mean Squared Error Loss in Knowledge Distillation. In IJCAI. 2628\u20132635.","DOI":"10.24963\/ijcai.2021\/362"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00276"},{"key":"e_1_3_2_1_21_1","volume-title":"Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461","author":"Lewis Mike","year":"2019","unstructured":"Mike Lewis, Yinhan Liu, Naman Goyal, Marjan Ghazvininejad, Abdelrahman Mohamed, Omer Levy, Ves Stoyanov, and Luke Zettlemoyer. 2019. Bart: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. arXiv preprint arXiv:1910.13461 (2019)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2017.2773081"},{"key":"e_1_3_2_1_23_1","volume-title":"Distilling Dense Representations for Ranking using Tightly-Coupled Teachers. CoRR abs\/2010.11386","author":"Lin Sheng-Chieh","year":"2020","unstructured":"Sheng-Chieh Lin, Jheng-Hong Yang, and Jimmy Lin. 2020. Distilling Dense Representations for Ranking using Tightly-Coupled Teachers. CoRR abs\/2010.11386 (2020). arXiv:2010.11386https:\/\/arxiv.org\/abs\/2010.11386"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.repl4nlp-1.17"},{"key":"e_1_3_2_1_25_1","volume-title":"Proceedings of the 7th International Conference on Learning Representations, ICLR 2019","author":"Loshchilov Ilya","year":"2019","unstructured":"Ilya Loshchilov and Frank Hutter. 2019. Decoupled Weight Decay Regularization. In Proceedings of the 7th International Conference on Learning Representations, ICLR 2019, New Orleans, LA, USA, May 6-9, 2019. OpenReview.net. https:\/\/openreview.net\/forum?id=Bkg6RiCqY7"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2205.09153"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.316"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Seyed-Iman Mirzadeh Mehrdad Farajtabar Ang Li Nir Levine Akihiro Matsukawa and Hassan Ghasemzadeh. 2020. Improved Knowledge Distillation via Teacher Assistant. In AAAI. 5191\u20135198.","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"e_1_3_2_1_29_1","volume-title":"Proceedings of the Workshop on Cognitive Computation: Integrating neural and symbolic approaches 2016(CEUR Workshop Proceedings, Vol.\u00a01773)","author":"Nguyen Tri","year":"2016","unstructured":"Tri Nguyen, Mir Rosenberg, Xia Song, Jianfeng Gao, Saurabh Tiwary, Rangan Majumder, and Li Deng. 2016. MS MARCO: A Human Generated MAchine Reading COmprehension Dataset. In Proceedings of the Workshop on Cognitive Computation: Integrating neural and symbolic approaches 2016(CEUR Workshop Proceedings, Vol.\u00a01773), Tarek\u00a0Richard Besold, Antoine Bordes, Artur\u00a0S. d\u2019Avila Garcez, and Greg Wayne (Eds.). http:\/\/ceur-ws.org\/Vol-1773\/CoCoNIPS_2016_paper9.pdf"},{"key":"e_1_3_2_1_30_1","volume-title":"Document Expansion by Query Prediction. CoRR abs\/1904.08375","author":"Nogueira Rodrigo\u00a0Frassetto","year":"2019","unstructured":"Rodrigo\u00a0Frassetto Nogueira, Wei Yang, Jimmy Lin, and Kyunghyun Cho. 2019. Document Expansion by Query Prediction. CoRR abs\/1904.08375 (2019)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_32_1","volume-title":"Prophetnet: Predicting future n-gram for sequence-to-sequence pre-training. arXiv preprint arXiv:2001.04063","author":"Qi Weizhen","year":"2020","unstructured":"Weizhen Qi, Yu Yan, Yeyun Gong, Dayiheng Liu, Nan Duan, Jiusheng Chen, Ruofei Zhang, and Ming Zhou. 2020. Prophetnet: Predicting future n-gram for sequence-to-sequence pre-training. arXiv preprint arXiv:2001.04063 (2020)."},{"key":"e_1_3_2_1_33_1","unstructured":"Yingqi Qu Yuchen Ding Jing Liu Kai Liu Ruiyang Ren Wayne\u00a0Xin Zhao Daxiang Dong Hua Wu and Haifeng Wang. 2021. RocketQA: An Optimized Training Approach to Dense Passage Retrieval for Open-Domain Question Answering. In NAACL-HLT. 5835\u20135847."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","unstructured":"Ruiyang Ren Shangwen Lv Yingqi Qu Jing Liu Wayne\u00a0Xin Zhao Qiaoqiao She Hua Wu Haifeng Wang and Ji-Rong Wen. 2021. PAIR: Leveraging Passage-Centric Similarity Relation for Improving Dense Passage Retrieval. In Findings of the Association for Computational Linguistics: ACL\/IJCNLP 2021(Findings of ACL Vol.\u00a0ACL\/IJCNLP 2021) Chengqing Zong Fei Xia Wenjie Li and Roberto Navigli (Eds.). 2173\u20132183. https:\/\/doi.org\/10.18653\/v1\/2021.findings-acl.191","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.emnlp-main.224"},{"key":"e_1_3_2_1_36_1","volume-title":"Pro-KD: Progressive Distillation by Following the Footsteps of the Teacher. CoRR abs\/2110.08532","author":"Rezagholizadeh Mehdi","year":"2021","unstructured":"Mehdi Rezagholizadeh, Aref Jafari, Puneeth Salad, Pranav Sharma, Ali\u00a0Saheb Pasand, and Ali Ghodsi. 2021. Pro-KD: Progressive Distillation by Following the Footsteps of the Teacher. CoRR abs\/2110.08532 (2021)."},{"key":"e_1_3_2_1_37_1","volume-title":"Carlo Gatta, and Yoshua Bengio","author":"Romero Adriana","year":"2015","unstructured":"Adriana Romero, Nicolas Ballas, Samira\u00a0Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2015. FitNets: Hints for Thin Deep Nets. In ICLR (Poster)."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.519"},{"key":"e_1_3_2_1_39_1","unstructured":"Tim Salimans and Jonathan Ho. 2022. Progressive Distillation for Fast Sampling of Diffusion Models. In ICLR. OpenReview.net."},{"key":"e_1_3_2_1_40_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.naacl-main.272"},{"key":"e_1_3_2_1_42_1","volume-title":"Using deepspeed and megatron to train megatron-turing nlg 530b, a large-scale generative language model. arXiv preprint arXiv:2201.11990","author":"Smith Shaden","year":"2022","unstructured":"Shaden Smith, Mostofa Patwary, Brandon Norick, Patrick LeGresley, Samyam Rajbhandari, Jared Casper, Zhun Liu, Shrimai Prabhumoye, George Zerveas, Vijay Korthikanti, 2022. Using deepspeed and megatron to train megatron-turing nlg 530b, a large-scale generative language model. arXiv preprint arXiv:2201.11990 (2022)."},{"key":"e_1_3_2_1_43_1","volume-title":"A Progressive Distillation Framework for Pretrained Transformer Compression. CoRR abs\/2106.02241","author":"Su Weiyue","year":"2021","unstructured":"Weiyue Su, Xuyi Chen, Shikun Feng, Jiaxiang Liu, Weixin Liu, Yu Sun, Hao Tian, Hua Wu, and Haifeng Wang. 2021. ERNIE-Tiny : A Progressive Distillation Framework for Pretrained Transformer Compression. CoRR abs\/2106.02241 (2021)."},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.746"},{"key":"e_1_3_2_1_45_1","volume-title":"Unified and Effective Ensemble Knowledge Distillation. CoRR abs\/2204.00548","author":"Wu Chuhan","year":"2022","unstructured":"Chuhan Wu, Fangzhao Wu, Tao Qi, and Yongfeng Huang. 2022. Unified and Effective Ensemble Knowledge Distillation. CoRR abs\/2204.00548 (2022)."},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the 9th International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zeFrfgyZln","author":"Xiong Lee","year":"2021","unstructured":"Lee Xiong, Chenyan Xiong, Ye Li, Kwok-Fung Tang, Jialin Liu, Paul\u00a0N. Bennett, Junaid Ahmed, and Arnold Overwijk. 2021. Approximate Nearest Neighbor Negative Contrastive Learning for Dense Text Retrieval. In Proceedings of the 9th International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=zeFrfgyZln"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.1145\/3077136.3080721"},{"key":"e_1_3_2_1_48_1","volume-title":"Is Retriever Merely an Approximator of Reader?CoRR abs\/2010.10999","author":"Yang Sohee","year":"2020","unstructured":"Sohee Yang and Minjoon Seo. 2020. Is Retriever Merely an Approximator of Reader?CoRR abs\/2010.10999 (2020). arXiv:2010.10999https:\/\/arxiv.org\/abs\/2010.10999"},{"key":"e_1_3_2_1_49_1","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2017. Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer. In ICLR (Poster)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Hansi Zeng Hamed Zamani and Vishwa Vinay. 2022. Curriculum Learning for Dense Retrieval Distillation. In SIGIR. 1979\u20131983.","DOI":"10.1145\/3477495.3531791"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","unstructured":"Jingtao Zhan Jiaxin Mao Yiqun Liu Jiafeng Guo Min Zhang and Shaoping Ma. 2021. Optimizing Dense Retrieval Model Training with Hard Negatives. In Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval Fernando Diaz Chirag Shah Torsten Suel Pablo Castells Rosie Jones and Tetsuya Sakai (Eds.). 1503\u20131512. https:\/\/doi.org\/10.1145\/3404835.3462880","DOI":"10.1145\/3404835.3462880"},{"key":"e_1_3_2_1_52_1","volume-title":"Proceedings of the 10th International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=MR7XubKUFB","author":"Zhang Hang","year":"2022","unstructured":"Hang Zhang, Yeyun Gong, Yelong Shen, Jiancheng Lv, Nan Duan, and Weizhu Chen. 2022. Adversarial Retriever-Ranker for Dense Text Retrieval. In Proceedings of the 10th International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=MR7XubKUFB"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.3788\/LOP202259.1122003"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_55_1","unstructured":"Chunting Zhou Jiatao Gu and Graham Neubig. 2020. Understanding Knowledge Distillation in Non-autoregressive Machine Translation. In ICLR."},{"key":"e_1_3_2_1_56_1","unstructured":"Wangchunshu Zhou Canwen Xu and Julian\u00a0J. McAuley. 2022. BERT Learns to Teach: Knowledge Distillation with Meta Learning. In ACL (1). 7037\u20137049."}],"event":{"name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA","acronym":"WWW '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583421","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583421","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T16:47:52Z","timestamp":1750178872000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583421"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":56,"alternative-id":["10.1145\/3543507.3583421","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583421","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}