{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T05:16:17Z","timestamp":1755839777771,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":59,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,21]],"date-time":"2024-10-21T00:00:00Z","timestamp":1729468800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,21]]},"DOI":"10.1145\/3627673.3680049","type":"proceedings-article","created":{"date-parts":[[2024,10,20]],"date-time":"2024-10-20T19:34:11Z","timestamp":1729452851000},"page":"4786-4794","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Towards Better Seach Query Classification with Distribution-Diverse Multi-Expert Knowledge Distillation in JD Ads Search"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-8053-4310","authenticated-orcid":false,"given":"Kun-Peng","family":"Ning","sequence":"first","affiliation":[{"name":"Peking University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0454-0808","authenticated-orcid":false,"given":"Ming","family":"Pang","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6691-0312","authenticated-orcid":false,"given":"Zheng","family":"Fang","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6944-9031","authenticated-orcid":false,"given":"Xue","family":"Jiang","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9382-6041","authenticated-orcid":false,"given":"Xi-Wei","family":"Zhao","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-2561-1919","authenticated-orcid":false,"given":"Chang-Ping","family":"Peng","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1379-5044","authenticated-orcid":false,"given":"Zhan-Gang","family":"Lin","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-1546-5807","authenticated-orcid":false,"given":"Jing-He","family":"Hu","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8555-2020","authenticated-orcid":false,"given":"Jing-Ping","family":"Shao","sequence":"additional","affiliation":[{"name":"Business Growth BU, JD.COM, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2120-5588","authenticated-orcid":false,"given":"Li","family":"Yuan","sequence":"additional","affiliation":[{"name":"Peking University, ShenZhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,21]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401184"},{"key":"e_1_3_2_1_2_1","volume-title":"Towards understanding ensemble, knowledge distillation and self-distillation in deep learning. arXiv preprint arXiv:2012.09816","author":"Allen-Zhu Zeyuan","year":"2020","unstructured":"Zeyuan Allen-Zhu and Yuanzhi Li. 2020. Towards understanding ensemble, knowledge distillation and self-distillation in deep learning. arXiv preprint arXiv:2012.09816 (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDE51399.2021.00308"},{"key":"e_1_3_2_1_4_1","volume-title":"Do deep nets really need to be deep? Advances in neural information processing systems","author":"Ba Jimmy","year":"2014","unstructured":"Jimmy Ba and Rich Caruana. 2014. Do deep nets really need to be deep? Advances in neural information processing systems, Vol. 27 (2014)."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/2207676.2207710"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/3-540-46154-X_5"},{"key":"e_1_3_2_1_7_1","volume-title":"A systematic study of the class imbalance problem in convolutional neural networks. Neural networks","author":"Buda Mateusz","year":"2018","unstructured":"Mateusz Buda, Atsuto Maki, and Maciej A Mazurowski. 2018. A systematic study of the class imbalance problem in convolutional neural networks. Neural networks, Vol. 106 (2018), 249--259."},{"key":"e_1_3_2_1_8_1","volume-title":"International conference on machine learning. PMLR, 872--881","author":"Byrd Jonathon","year":"2019","unstructured":"Jonathon Byrd and Zachary Lipton. 2019. What is the effect of importance weighting in deep learning?. In International conference on machine learning. PMLR, 872--881."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3358170"},{"volume-title":"Convolutional neural network for sentence classification. Master's thesis","author":"Chen Yahui","key":"e_1_3_2_1_10_1","unstructured":"Yahui Chen. 2015. Convolutional neural network for sentence classification. Master's thesis. University of Waterloo."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01294"},{"key":"e_1_3_2_1_12_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_1_13_1","volume-title":"Product classification in e-commerce using distributional semantics. arXiv preprint arXiv:1606.06083","author":"Gupta Vivek","year":"2016","unstructured":"Vivek Gupta, Harish Karnick, Ashendra Bansal, and Pradhuman Jhala. 2016. Product classification in e-commerce using distributional semantics. arXiv preprint arXiv:1606.06083 (2016)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441717"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2008.239"},{"key":"e_1_3_2_1_16_1","volume-title":"Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeff Dean. 2015. Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.580"},{"key":"e_1_3_2_1_18_1","volume-title":"The class imbalance problem: A systematic study. Intelligent data analysis","author":"Japkowicz Nathalie","year":"2002","unstructured":"Nathalie Japkowicz and Shaju Stephen. 2002. The class imbalance problem: A systematic study. Intelligent data analysis, Vol. 6, 5 (2002), 429--449."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-2902"},{"key":"e_1_3_2_1_20_1","volume-title":"Short Text Pre-training with Extended Token Classification for E-commerce Query Understanding. arXiv preprint arXiv:2210.03915","author":"Jiang Haoming","year":"2022","unstructured":"Haoming Jiang, Tianyu Cao, Zheng Li, Chen Luo, Xianfeng Tang, Qingyu Yin, Danqing Zhang, Rahul Goutam, and Bing Yin. 2022. Short Text Pre-training with Extended Token Classification for E-commerce Query Understanding. arXiv preprint arXiv:2210.03915 (2022)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM50108.2020.00030"},{"key":"e_1_3_2_1_22_1","volume-title":"Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351","author":"Jiao Xiaoqi","year":"2019","unstructured":"Xiaoqi Jiao, Yichun Yin, Lifeng Shang, Xin Jiang, Xiao Chen, Linlin Li, Fang Wang, and Qun Liu. 2019. Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351 (2019)."},{"key":"e_1_3_2_1_23_1","volume-title":"Bag of tricks for efficient text classification. arXiv preprint arXiv:1607.01759","author":"Joulin Armand","year":"2016","unstructured":"Armand Joulin, Edouard Grave, Piotr Bojanowski, and Tomas Mikolov. 2016. Bag of tricks for efficient text classification. arXiv preprint arXiv:1607.01759 (2016)."},{"key":"e_1_3_2_1_24_1","unstructured":"Heran Lin Pengcheng Xiong Danqing Zhang Fan Yang Ryoichi Kato Mukul Kumar William Headden and Bing Yin. 2020. Light feed-forward networks for shard selection in large-scale product search. (2020)."},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/BigData.2018.8622008"},{"key":"e_1_3_2_1_26_1","volume-title":"Rethinking Position Bias Modeling with Knowledge Distillation for CTR Prediction. arXiv preprint arXiv:2204.00270","author":"Liu Congcong","year":"2022","unstructured":"Congcong Liu, Yuejiang Li, Jian Zhu, Xiwei Zhao, Changping Peng, Zhangang Lin, and Jingping Shao. 2022. Rethinking Position Bias Modeling with Knowledge Distillation for CTR Prediction. arXiv preprint arXiv:2204.00270 (2022)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539200"},{"key":"e_1_3_2_1_28_1","volume-title":"Learning robust models for e-commerce product search. arXiv preprint arXiv:2005.03624","author":"Nguyen Thanh V","year":"2020","unstructured":"Thanh V Nguyen, Nikhil Rao, and Karthik Subbian. 2020. Learning robust models for e-commerce product search. arXiv preprint arXiv:2005.03624 (2020)."},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3292500.3330759"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/SP.2016.41"},{"key":"e_1_3_2_1_31_1","volume-title":"The EBI search engine: EBI search as a service-making biological data accessible for all. Nucleic acids research","author":"Park Young M","year":"2017","unstructured":"Young M Park, Silvano Squizzato, Nicola Buso, Tamer Gur, and Rodrigo Lopez. 2017. The EBI search engine: EBI search as a service-making biological data accessible for all. Nucleic acids research, Vol. 45, W1 (2017), W545--W549."},{"key":"e_1_3_2_1_32_1","volume-title":"International Conference on Machine Learning. PMLR, 5142--5151","author":"Phuong Mary","year":"2019","unstructured":"Mary Phuong and Christoph Lampert. 2019. Towards understanding knowledge distillation. In International Conference on Machine Learning. PMLR, 5142--5151."},{"key":"e_1_3_2_1_33_1","unstructured":"Alec Radford Karthik Narasimhan Tim Salimans Ilya Sutskever et al. 2018. Improving language understanding by generative pre-training. (2018)."},{"key":"e_1_3_2_1_34_1","unstructured":"Alec Radford Jeffrey Wu Rewon Child David Luan Dario Amodei Ilya Sutskever et al. 2019. Language models are unsupervised multitask learners. OpenAI blog Vol. 1 8 (2019) 9."},{"key":"e_1_3_2_1_35_1","volume-title":"Artificial Intelligence and Soft Computing: 11th International Conference, ICAISC 2012, Zakopane, Poland, April 29-May 3, 2012, Proceedings, Part I 11","author":"Najgebauer Patryk","year":"2012","unstructured":"Patryk Najgebauer, Tomasz Nowak, Jakub Romanowski, Marcin Gabryel. 2012. Properties and structure of fast text search engine in context of semantic image analysis. In Artificial Intelligence and Soft Computing: 11th International Conference, ICAISC 2012, Zakopane, Poland, April 29-May 3, 2012, Proceedings, Part I 11. Springer, 592--599."},{"key":"e_1_3_2_1_36_1","volume-title":"a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108","author":"Sanh Victor","year":"2019","unstructured":"Victor Sanh, Lysandre Debut, Julien Chaumond, and Thomas Wolf. 2019. DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter. arXiv preprint arXiv:1910.01108 (2019)."},{"volume-title":"Data Science","author":"Shah Nemil","key":"e_1_3_2_1_37_1","unstructured":"Nemil Shah, Yash Goda, Naitik Rathod, Vatsal Khandor, Pankaj Kulkarni, and Ramchandra Mangrulkar. 2022. BERT-and FastText-Based Research Paper Recommender System. In Data Science. Chapman and Hall\/CRC, 169--183."},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/1645953.1646047"},{"key":"e_1_3_2_1_39_1","volume-title":"Proceedings, Part VII 14","author":"Shen Li","year":"2016","unstructured":"Li Shen, Zhouchen Lin, and Qingming Huang. 2016. Relay backpropagation for effective learning of deep convolutional neural networks. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part VII 14. Springer, 467--482."},{"key":"e_1_3_2_1_40_1","volume-title":"Is label smoothing truly incompatible with knowledge distillation: An empirical study. arXiv preprint arXiv:2104.00676","author":"Shen Zhiqiang","year":"2021","unstructured":"Zhiqiang Shen, Zechun Liu, Dejia Xu, Zitian Chen, Kwang-Ting Cheng, and Marios Savvides. 2021. Is label smoothing truly incompatible with knowledge distillation: An empirical study. arXiv preprint arXiv:2104.00676 (2021)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210152"},{"key":"e_1_3_2_1_42_1","volume-title":"Nicola Buso, Tamer Gur, Andrew Cowley, Weizhong Li, Mahmut Uludag, Sangya Pundir, Jennifer A Cham, Hamish McWilliam, et al.","author":"Squizzato Silvano","year":"2015","unstructured":"Silvano Squizzato, Young Mi Park, Nicola Buso, Tamer Gur, Andrew Cowley, Weizhong Li, Mahmut Uludag, Sangya Pundir, Jennifer A Cham, Hamish McWilliam, et al. 2015. The EBI Search engine: providing search and retrieval functionality for biological data from EMBL-EBI. Nucleic acids research, Vol. 43, W1 (2015), W585--W588."},{"key":"e_1_3_2_1_43_1","first-page":"6906","article-title":"Does knowledge distillation really work","volume":"34","author":"Stanton Samuel","year":"2021","unstructured":"Samuel Stanton, Pavel Izmailov, Polina Kirichenko, Alexander A Alemi, and Andrew G Wilson. 2021. Does knowledge distillation really work? Advances in Neural Information Processing Systems, Vol. 34 (2021), 6906--6919.","journal-title":"Advances in Neural Information Processing Systems"},{"volume-title":"Kendall's advanced theory of statistics, distribution theory","author":"Stuart Alan","key":"e_1_3_2_1_44_1","unstructured":"Alan Stuart and Keith Ord. 2010. Kendall's advanced theory of statistics, distribution theory. Vol. 1. John Wiley & Sons."},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the 12th Language Resources and Evaluation Conference. 3520--3527","author":"Tambi Ritiz","year":"2020","unstructured":"Ritiz Tambi, Ajinkya Kale, and Tracy Holloway King. 2020. Search query language identification using weak labeling. In Proceedings of the 12th Language Resources and Evaluation Conference. 3520--3527."},{"key":"e_1_3_2_1_46_1","volume-title":"H Chi, and Sagar Jain","author":"Tang Jiaxi","year":"2020","unstructured":"Jiaxi Tang, Rakesh Shivanna, Zhe Zhao, Dong Lin, Anima Singh, Ed H Chi, and Sagar Jain. 2020. Understanding and improving knowledge distillation. arXiv preprint arXiv:2002.03532 (2020)."},{"key":"e_1_3_2_1_47_1","volume-title":"NIPS workshop on Graph Representation Learning.","author":"Tayal Kshitij","year":"2019","unstructured":"Kshitij Tayal, Rao Nikhil, Saurabh Agarwal, and Karthik Subbian. 2019. Short text classification using graph convolutional network. In NIPS workshop on Graph Representation Learning."},{"key":"e_1_3_2_1_48_1","volume-title":"International conference on machine learning. PMLR, 10347--10357","author":"Touvron Hugo","year":"2021","unstructured":"Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, and Herv\u00e9 J\u00e9gou. 2021. Training data-efficient image transformers & distillation through attention. In International conference on machine learning. PMLR, 10347--10357."},{"key":"e_1_3_2_1_49_1","volume-title":"The devil is in the tails: Fine-grained classification in the wild. arXiv preprint arXiv:1709.01450","author":"Horn Grant Van","year":"2017","unstructured":"Grant Van Horn and Pietro Perona. 2017. The devil is in the tails: Fine-grained classification in the wild. arXiv preprint arXiv:1709.01450 (2017)."},{"key":"e_1_3_2_1_50_1","volume-title":"Knowledge distillation and student-teacher learning for visual intelligence: A review and new outlooks","author":"Wang Lin","year":"2021","unstructured":"Lin Wang and Kuk-Jin Yoon. 2021. Knowledge distillation and student-teacher learning for visual intelligence: A review and new outlooks. IEEE Transactions on Pattern Analysis and Machine Intelligence (2021)."},{"key":"e_1_3_2_1_51_1","volume-title":"Learning to model the tail. Advances in neural information processing systems","author":"Wang Yu-Xiong","year":"2017","unstructured":"Yu-Xiong Wang, Deva Ramanan, and Martial Hebert. 2017. Learning to model the tail. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11996"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3442381.3450129"},{"key":"e_1_3_2_1_54_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401320"},{"key":"e_1_3_2_1_55_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00396"},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3437963.3441822"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00974"},{"key":"e_1_3_2_1_59_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.ecnlp-1.17"}],"event":{"name":"CIKM '24: The 33rd ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval"],"location":"Boise ID USA","acronym":"CIKM '24"},"container-title":["Proceedings of the 33rd ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3680049","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3627673.3680049","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:17Z","timestamp":1750294697000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3627673.3680049"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,21]]},"references-count":59,"alternative-id":["10.1145\/3627673.3680049","10.1145\/3627673"],"URL":"https:\/\/doi.org\/10.1145\/3627673.3680049","relation":{},"subject":[],"published":{"date-parts":[[2024,10,21]]},"assertion":[{"value":"2024-10-21","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}