{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T14:18:12Z","timestamp":1775225892862,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":55,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,4,30]],"date-time":"2023-04-30T00:00:00Z","timestamp":1682812800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,4,30]]},"DOI":"10.1145\/3543507.3583209","type":"proceedings-article","created":{"date-parts":[[2023,4,26]],"date-time":"2023-04-26T23:30:25Z","timestamp":1682551825000},"page":"801-811","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":24,"title":["Distillation from Heterogeneous Models for Top-K Recommendation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-5528-1426","authenticated-orcid":false,"given":"SeongKu","family":"Kang","sequence":"first","affiliation":[{"name":"Pohang University of Science and Technology, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8813-3179","authenticated-orcid":false,"given":"Wonbin","family":"Kweon","sequence":"additional","affiliation":[{"name":"Pohang University of Science and Technology, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2173-3476","authenticated-orcid":false,"given":"Dongha","family":"Lee","sequence":"additional","affiliation":[{"name":"Yonsei University, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3108-5601","authenticated-orcid":false,"given":"Jianxun","family":"Lian","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8608-8482","authenticated-orcid":false,"given":"Xing","family":"Xie","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7510-0255","authenticated-orcid":false,"given":"Hwanjo","family":"Yu","sequence":"additional","affiliation":[{"name":"Pohang University of Science and Technology, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2023,4,30]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Transferring inductive biases through knowledge distillation. arXiv preprint arXiv:2006.00555","author":"Abnar Samira","year":"2020","unstructured":"Samira Abnar, Mostafa Dehghani, and Willem Zuidema. 2020. Transferring inductive biases through knowledge distillation. arXiv preprint arXiv:2006.00555 (2020)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"crossref","unstructured":"Yoshua Bengio J\u00e9r\u00f4me Louradour Ronan Collobert and Jason Weston. 2009. Curriculum Learning. In ICML.","DOI":"10.1145\/1553374.1553380"},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"crossref","unstructured":"George Cazenavette Tongzhou Wang Antonio Torralba Alexei\u00a0A Efros and Jun-Yan Zhu. 2022. Dataset distillation by matching training trajectories. In CVPR. 4750\u20134759.","DOI":"10.1109\/CVPR52688.2022.01045"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"crossref","unstructured":"Yankai Chen Huifeng Guo Yingxue Zhang Chen Ma Ruiming Tang Jingjie Li and Irwin King. 2022. Learning Binarized Graph Representations with Multi-faceted Quantization Reinforcement for Top-K Recommendation. In KDD.","DOI":"10.1145\/3534678.3539452"},{"key":"e_1_3_2_2_5_1","volume-title":"Darkrank: Accelerating deep metric learning via cross sample similarities transfer. In AAAI.","author":"Chen Yuntao","year":"2018","unstructured":"Yuntao Chen, Naiyan Wang, and Zhaoxiang Zhang. 2018. Darkrank: Accelerating deep metric learning via cross sample similarities transfer. In AAAI."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Yudong Chen Xin Wang Miao Fan Jizhou Huang Shengwen Yang and Wenwu Zhu. 2021. Curriculum Meta-Learning for Next POI Recommendation. In KDD.","DOI":"10.1145\/3447548.3467132"},{"key":"e_1_3_2_2_7_1","unstructured":"Jingtao Ding Yuhan Quan Quanming Yao Yong Li and Depeng Jin. 2020. Simplify and Robustify Negative Sampling for Implicit Collaborative Filtering. In NeurIPS."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Takashi Fukuda Masayuki Suzuki Gakuto Kurata Samuel Thomas Jia Cui and Bhuvana Ramabhadran. 2017. Efficient Knowledge Distillation from an Ensemble of Teachers.. In Interspeech. 3697\u20133701.","DOI":"10.21437\/Interspeech.2017-614"},{"key":"e_1_3_2_2_9_1","unstructured":"Xiangnan He Kuan Deng Xiang Wang Yan Li Yongdong Zhang and Meng Wang. 2020. LightGCN: Simplifying and Powering Graph Convolution Network for Recommendation. In SIGIR."},{"key":"e_1_3_2_2_10_1","unstructured":"Xiangnan He Lizi Liao Hanwang Zhang Liqiang Nie Xia Hu and Tat-Seng Chua. 2017. Neural collaborative filtering. In WWW."},{"key":"e_1_3_2_2_11_1","volume-title":"Distilling the knowledge in a neural network. NIPS","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton, Oriol Vinyals, and Jeffrey Dean. 2015. Distilling the knowledge in a neural network. NIPS (2015)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"crossref","unstructured":"Cheng-Kang Hsieh Longqi Yang Yin Cui Tsung-Yi Lin Serge Belongie and Deborah Estrin. 2017. Collaborative metric learning. In WWW.","DOI":"10.1145\/3038912.3052639"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"crossref","unstructured":"Lu Jiang Deyu Meng Qian Zhao Shiguang Shan and Alexander\u00a0G Hauptmann. 2015. Self-paced curriculum learning. In AAAI.","DOI":"10.1609\/aaai.v29i1.9608"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"crossref","unstructured":"Xiao Jin Baoyun Peng Yichao Wu Yu Liu Jiaheng Liu Ding Liang Junjie Yan and Xiaolin Hu. 2019. Knowledge distillation via route constrained optimization. In CVPR. 1345\u20131354.","DOI":"10.1109\/ICCV.2019.00143"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"crossref","unstructured":"SeongKu Kang Junyoung Hwang Wonbin Kweon and Hwanjo Yu. 2020. DE-RRD: A Knowledge Distillation Framework for Recommender System. In CIKM.","DOI":"10.1145\/3340531.3412005"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2021.08.060"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"crossref","unstructured":"SeongKu Kang Junyoung Hwang Wonbin Kweon and Hwanjo Yu. 2021. Topology Distillation for Recommender System. In KDD.","DOI":"10.1145\/3447548.3467319"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"crossref","unstructured":"SeongKu Kang Junyoung Hwang Dongha Lee and Hwanjo Yu. 2019. Semi-supervised learning for cross-domain recommendation to cold-start users. In CIKM.","DOI":"10.1145\/3357384.3357914"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"crossref","unstructured":"SeongKu Kang Dongha Lee Wonbin Kweon Junyoung Hwang and Hwanjo Yu. 2022. Consensus Learning from Heterogeneous Objectives for One-Class Collaborative Filtering. In WWW.","DOI":"10.1145\/3485447.3512070"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2021.107958"},{"key":"e_1_3_2_2_21_1","unstructured":"M Kumar Benjamin Packer and Daphne Koller. 2010. Self-paced learning for latent variable models. In NeurIPS."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"crossref","unstructured":"Wonbin Kweon SeongKu Kang and Hwanjo Yu. 2021. Bidirectional Distillation for Top-K Recommender System. In WWW.","DOI":"10.1145\/3442381.3449878"},{"key":"e_1_3_2_2_23_1","unstructured":"xu lan Xiatian Zhu and Shaogang Gong. 2018. Knowledge Distillation by On-the-Fly Native Ensemble. In NeurIPS."},{"key":"e_1_3_2_2_24_1","unstructured":"Dongha Lee SeongKu Kang Hyunjun Ju Chanyoung Park and Hwanjo Yu. 2021. Bootstrapping User and Item Representations for One-Class Collaborative Filtering. In SIGIR."},{"key":"e_1_3_2_2_25_1","unstructured":"Youngjune Lee and Kee-Eung Kim. 2021. Dual Correction Strategy for Ranking Distillation in Top-N Recommender System. In CIKM."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"crossref","unstructured":"Dawen Liang Rahul\u00a0G. Krishnan Matthew\u00a0D. Hoffman and Tony Jebara. 2018. Variational Autoencoders for Collaborative Filtering. In WWW.","DOI":"10.1145\/3178876.3186150"},{"key":"e_1_3_2_2_27_1","volume-title":"An easy-to-hard learning paradigm for multiple classes and multiple labels. The Journal of Machine Learning Research 18","author":"Liu Weiwei","year":"2017","unstructured":"Weiwei Liu, Ivor\u00a0W Tsang, and Klaus-Robert M\u00fcller. 2017. An easy-to-hard learning paradigm for multiple classes and multiple labels. The Journal of Machine Learning Research 18 (2017)."},{"key":"e_1_3_2_2_28_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2020.07.048"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"crossref","unstructured":"Sean MacAvaney Franco\u00a0Maria Nardini Raffaele Perego Nicola Tonellotto Nazli Goharian and Ophir Frieder. 2020. Training curricula for open domain answer re-ranking. In SIGIR. 529\u2013538.","DOI":"10.1145\/3397271.3401094"},{"key":"e_1_3_2_2_30_1","volume-title":"Analyzing and modeling rank data","author":"Marden I","unstructured":"John\u00a0I Marden. 1996. Analyzing and modeling rank data. CRC Press."},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3365375"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neunet.2019.01.012"},{"key":"e_1_3_2_2_33_1","volume-title":"Rankdistil: Knowledge distillation for ranking. In AISTATS. PMLR.","author":"Reddi Sashank","year":"2021","unstructured":"Sashank Reddi, Rama\u00a0Kumar Pasumarthi, Aditya Menon, Ankit\u00a0Singh Rawat, Felix Yu, Seungyeon Kim, Andreas Veit, and Sanjiv Kumar. 2021. Rankdistil: Knowledge distillation for ranking. In AISTATS. PMLR."},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"crossref","unstructured":"Steffen Rendle and Christoph Freudenthaler. 2014. Improving pairwise learning for item recommendation from implicit feedback. In WSDM.","DOI":"10.1145\/2556195.2556248"},{"key":"e_1_3_2_2_35_1","volume-title":"BPR: Bayesian personalized ranking from implicit feedback. In UAI.","author":"Rendle Steffen","year":"2009","unstructured":"Steffen Rendle, Christoph Freudenthaler, Zeno Gantner, and Lars Schmidt-Thieme. 2009. BPR: Bayesian personalized ranking from implicit feedback. In UAI."},{"key":"e_1_3_2_2_36_1","volume-title":"Carlo Gatta, and Yoshua Bengio","author":"Romero Adriana","year":"2014","unstructured":"Adriana Romero, Nicolas Ballas, Samira\u00a0Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2014. Fitnets: Hints for thin deep nets. arXiv preprint arXiv:1412.6550 (2014)."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2740908.2742726"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-86523-8_36"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"crossref","unstructured":"Jiaxi Tang and Ke Wang. 2018. Ranking distillation: Learning compact ranking models with high performance for recommender system. In KDD.","DOI":"10.1145\/3219819.3220021"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"crossref","unstructured":"Haoyu Wang Defu Lian and Yong Ge. 2019. Binarized collaborative filtering with distilling graph convolutional networks. In IJCAI.","DOI":"10.24963\/ijcai.2019\/667"},{"key":"e_1_3_2_2_41_1","volume-title":"Mulde: Multi-teacher knowledge distillation for low-dimensional knowledge graph embeddings. In WWW. 1716\u20131726.","author":"Wang Kai","year":"2021","unstructured":"Kai Wang, Yu Liu, Qian Ma, and Quan\u00a0Z Sheng. 2021. Mulde: Multi-teacher knowledge distillation for low-dimensional knowledge graph embeddings. In WWW. 1716\u20131726."},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","unstructured":"X. Wang Y. Chen and W. Zhu. 2021. A Survey on Curriculum Learning. IEEE Transactions on Pattern Analysis & Machine Intelligence (2021). https:\/\/doi.org\/10.1109\/TPAMI.2021.3069908","DOI":"10.1109\/TPAMI.2021.3069908"},{"key":"e_1_3_2_2_43_1","unstructured":"Markus Weimer Alexandros Karatzoglou Quoc Le and Alex Smola. 2007. COFI RANK - Maximum Margin Matrix Factorization for Collaborative Ranking. In NIPS."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.387"},{"key":"e_1_3_2_2_45_1","volume-title":"When do curricula work?ICLR","author":"Wu Xiaoxia","year":"2021","unstructured":"Xiaoxia Wu, Ethan Dyer, and Behnam Neyshabur. 2021. When do curricula work?ICLR (2021)."},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"crossref","unstructured":"Fen Xia Tie-Yan Liu Jue Wang Wensheng Zhang and Hang Li. 2008. Listwise approach to learning to rank: theory and algorithm. In ICML.","DOI":"10.1145\/1390156.1390306"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"crossref","unstructured":"Xin Xia Hongzhi Yin Junliang Yu Qinyong Wang Guandong Xu and Quoc Viet\u00a0Hung Nguyen. 2022. On-Device Next-Item Recommendation with Self-Supervised Knowledge Distillation. In SIGIR.","DOI":"10.1145\/3477495.3531775"},{"key":"e_1_3_2_2_48_1","volume-title":"Learning from multiple experts: Self-paced knowledge distillation for long-tailed classification","author":"Xiang Liuyu","unstructured":"Liuyu Xiang, Guiguang Ding, and Jungong Han. 2020. Learning from multiple experts: Self-paced knowledge distillation for long-tailed classification. In ECCV. Springer, 247\u2013263."},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"crossref","unstructured":"Shan You Chang Xu Chao Xu and Dacheng Tao. 2017. Learning from multiple teacher networks. In KDD. 1285\u20131294.","DOI":"10.1145\/3097983.3098135"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"crossref","unstructured":"Fei Yuan Linjun Shou Jian Pei Wutao Lin Ming Gong Yan Fu and Daxin Jiang. 2021. Reinforced multi-teacher selection for knowledge distillation. In AAAI.","DOI":"10.1609\/aaai.v35i16.17680"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"crossref","unstructured":"Hansi Zeng Hamed Zamani and Vishwa Vinay. 2022. Curriculum Learning for Dense Retrieval Distillation. In SIGIR. 1979\u20131983.","DOI":"10.1145\/3477495.3531791"},{"key":"e_1_3_2_2_52_1","unstructured":"Tianyi Zhou Shengjie Wang and Jeff Bilmes. 2020. Time-consistent self-supervision for semi-supervised learning. In ICML."},{"key":"e_1_3_2_2_53_1","unstructured":"Jieming Zhu Jinyang Liu Weiqi Li Jincai Lai Xiuqiang He Liang Chen and Zibin Zheng. 2020. Ensembled CTR Prediction via Knowledge Distillation. In CIKM."},{"key":"e_1_3_2_2_54_1","unstructured":"Qingqing Zhu Xiuying Chen Pengfei Wu JunFei Liu and Dongyan Zhao. 2021. Combining Curriculum Learning and Knowledge Distillation for Dialogue Generation. In EMNLP."},{"key":"e_1_3_2_2_55_1","doi-asserted-by":"crossref","unstructured":"Ziwei Zhu Jianling Wang and James Caverlee. 2019. Improving Top-k Recommendation via Joint Collaborative Autoencoders. In WWW.","DOI":"10.1145\/3308558.3313678"}],"event":{"name":"WWW '23: The ACM Web Conference 2023","location":"Austin TX USA","acronym":"WWW '23","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2023"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583209","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3543507.3583209","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T18:09:18Z","timestamp":1750183758000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3543507.3583209"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,4,30]]},"references-count":55,"alternative-id":["10.1145\/3543507.3583209","10.1145\/3543507"],"URL":"https:\/\/doi.org\/10.1145\/3543507.3583209","relation":{},"subject":[],"published":{"date-parts":[[2023,4,30]]},"assertion":[{"value":"2023-04-30","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}