{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,4]],"date-time":"2026-05-04T05:47:38Z","timestamp":1777873658490,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":50,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,8,3]]},"DOI":"10.1145\/3711896.3737231","type":"proceedings-article","created":{"date-parts":[[2025,8,3]],"date-time":"2025-08-03T21:07:39Z","timestamp":1754255259000},"page":"5182-5193","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Hardness-aware Privileged Features Distillation with Latent Alignment for CVR Prediction"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3438-3535","authenticated-orcid":false,"given":"Huining","family":"Yuan","sequence":"first","affiliation":[{"name":"ByteDance Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3796-161X","authenticated-orcid":false,"given":"Wenpeng","family":"Zhang","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0615-5972","authenticated-orcid":false,"given":"Zijie","family":"Hao","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8286-0255","authenticated-orcid":false,"given":"Zengde","family":"Deng","sequence":"additional","affiliation":[{"name":"ByteDance Inc., Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,8,3]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.3233\/FAIA200188"},{"key":"e_1_3_2_2_2_1","volume-title":"The use of the area under the ROC curve in the evaluation of machine learning algorithms. Pattern recognition","author":"Bradley Andrew P","year":"1997","unstructured":"Andrew P Bradley. 1997. The use of the area under the ROC curve in the evaluation of machine learning algorithms. Pattern recognition, Vol. 30, 7 (1997), 1145-1159."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5746"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01163"},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i8.16865"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00497"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/2988450.2988454"},{"key":"e_1_3_2_2_8_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Chung Inseop","year":"2020","unstructured":"Inseop Chung, SeongUk Park, Jangho Kim, and Nojun Kwak. 2020. Feature-map-level online adversarial knowledge distillation. In International Conference on Machine Learning. PMLR, 2006-2015."},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959190"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01453-z"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3616855.3635810"},{"key":"e_1_3_2_2_12_1","volume-title":"International conference on machine learning. PMLR, 1321-1330","author":"Guo Chuan","year":"2017","unstructured":"Chuan Guo, Geoff Pleiss, Yu Sun, and Kilian Q Weinberger. 2017a. On calibration of modern neural networks. In International conference on machine learning. PMLR, 1321-1330."},{"key":"e_1_3_2_2_13_1","unstructured":"Huifeng Guo Ruiming Tang Yunming Ye Zhenguo Li and Xiuqiang He. 2017b. DeepFM: A Factorization-Machine based Neural Network for CTR Prediction. arXiv:1703.04247 [cs.IR] https:\/\/arxiv.org\/abs\/1703.04247"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01103"},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00201"},{"key":"e_1_3_2_2_16_1","unstructured":"Geoffrey Hinton Oriol Vinyals and Jeff Dean. 2015. Distilling the Knowledge in a Neural Network. arXiv:1503.02531 [stat.ML] https:\/\/arxiv.org\/abs\/1503.02531"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.04.112"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i9.16969"},{"key":"e_1_3_2_2_19_1","first-page":"16468","article-title":"Instance-conditional knowledge distillation for object detection","volume":"34","author":"Kang Zijian","year":"2021","unstructured":"Zijian Kang, Peizhen Zhang, Xiangyu Zhang, Jian Sun, and Nanning Zheng. 2021. Instance-conditional knowledge distillation for object detection. Advances in Neural Information Processing Systems, Vol. 34 (2021), 16468-16480.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_20_1","volume-title":"Kingma and Jimmy Ba","author":"Diederik","year":"2017","unstructured":"Diederik P. Kingma and Jimmy Ba. 2017. Adam: A Method for Stochastic Optimization. arXiv:1412.6980 [cs.LG] https:\/\/arxiv.org\/abs\/1412.6980"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3592079"},{"key":"e_1_3_2_2_22_1","volume-title":"Distilling a powerful student model via online knowledge distillation","author":"Li Shaojie","year":"2022","unstructured":"Shaojie Li, Mingbao Lin, Yan Wang, Yongjian Wu, Yonghong Tian, Ling Shao, and Rongrong Ji. 2022. Distilling a powerful student model via online knowledge distillation. IEEE transactions on neural networks and learning systems, Vol. 34, 11 (2022), 8743-8752."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-65414-6_13"},{"key":"e_1_3_2_2_24_1","unstructured":"Tsung-Yi Lin Priya Goyal Ross Girshick Kaiming He and Piotr Doll\u00e1r. 2018. Focal Loss for Dense Object Detection. arXiv:1708.02002 [cs.CV] https:\/\/arxiv.org\/abs\/1708.02002"},{"key":"e_1_3_2_2_25_1","unstructured":"Dongyang Liu Meina Kan Shiguang Shan and Xilin Chen. 2023. Function-Consistent Feature Distillation. arXiv:2304.11832 [cs.CV] https:\/\/arxiv.org\/abs\/2304.11832"},{"key":"e_1_3_2_2_26_1","first-page":"15682","article-title":"Revisiting the calibration of modern neural networks","volume":"34","author":"Minderer Matthias","year":"2021","unstructured":"Matthias Minderer, Josip Djolonga, Rob Romijnders, Frances Hubis, Xiaohua Zhai, Neil Houlsby, Dustin Tran, and Mario Lucic. 2021. Revisiting the calibration of modern neural networks. Advances in Neural Information Processing Systems, Vol. 34 (2021), 15682-15694.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i04.5963"},{"key":"e_1_3_2_2_28_1","first-page":"15288","article-title":"Calibrating deep neural networks using focal loss","volume":"33","author":"Mukhoti Jishnu","year":"2020","unstructured":"Jishnu Mukhoti, Viveka Kulharia, Amartya Sanyal, Stuart Golodetz, Philip Torr, and Puneet Dokania. 2020. Calibrating deep neural networks using focal loss. Advances in Neural Information Processing Systems, Vol. 33 (2020), 15288-15299.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00409"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00511"},{"key":"e_1_3_2_2_31_1","volume-title":"Antoine Chassang, Carlo Gatta, and Yoshua Bengio.","author":"Romero Adriana","year":"2015","unstructured":"Adriana Romero, Nicolas Ballas, Samira Ebrahimi Kahou, Antoine Chassang, Carlo Gatta, and Yoshua Bengio. 2015. FitNets: Hints for Thin Deep Nets. arXiv:1412.6550 [cs.LG] https:\/\/arxiv.org\/abs\/1412.6550"},{"key":"e_1_3_2_2_32_1","volume-title":"Shamir and Dong Lin","author":"Gil","year":"2022","unstructured":"Gil I. Shamir and Dong Lin. 2022. Real World Large Scale Recommendation Systems Reproducibility and Smooth Activations. arXiv:2202.06499 [cs.LG] https:\/\/arxiv.org\/abs\/2202.06499"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599851"},{"key":"e_1_3_2_2_34_1","volume-title":"International Conference on Machine Learning. PMLR, 33833-33849","author":"Tao Linwei","year":"2023","unstructured":"Linwei Tao, Minjing Dong, and Chang Xu. 2023. Dual focal loss for calibration. In International Conference on Machine Learning. PMLR, 33833-33849."},{"key":"e_1_3_2_2_35_1","unstructured":"Yonglong Tian Dilip Krishnan and Phillip Isola. 2022. Contrastive Representation Distillation. arXiv:1910.10699 [cs.LG] https:\/\/arxiv.org\/abs\/1910.10699"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00145"},{"key":"e_1_3_2_2_37_1","first-page":"6305","article-title":"SemCKD: Semantic calibration for cross-layer knowledge distillation","volume":"35","author":"Wang Can","year":"2022","unstructured":"Can Wang, Defang Chen, Jian-Ping Mei, Yuan Zhang, Yan Feng, and Chun Chen. 2022a. SemCKD: Semantic calibration for cross-layer knowledge distillation. IEEE Transactions on Knowledge and Data Engineering, Vol. 35, 6 (2022), 6305-6319.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557464"},{"key":"e_1_3_2_2_39_1","first-page":"8183","article-title":"Distilling knowledge by mimicking features","volume":"44","author":"Wang Guo-Hua","year":"2021","unstructured":"Guo-Hua Wang, Yifan Ge, and Jianxin Wu. 2021. Distilling knowledge by mimicking features. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 11 (2021), 8183-8195.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_2_40_1","volume-title":"Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang.","author":"Wang Yuhao","year":"2023","unstructured":"Yuhao Wang, Ha Tsz Lam, Yi Wong, Ziru Liu, Xiangyu Zhao, Yichao Wang, Bo Chen, Huifeng Guo, and Ruiming Tang. 2023. Multi-Task Deep Recommender Systems: A Survey. arXiv:2302.03525 [cs.IR] https:\/\/arxiv.org\/abs\/2302.03525"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3403309"},{"key":"e_1_3_2_2_42_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i4.20352"},{"key":"e_1_3_2_2_43_1","first-page":"26658","article-title":"Toward understanding privileged features distillation in learning-to-rank","volume":"35","author":"Yang Shuo","year":"2022","unstructured":"Shuo Yang, Sujay Sanghavi, Holakou Rahmanian, Jan Bakus, and Vishwanathan SVN. 2022b. Toward understanding privileged features distillation in learning-to-rank. Advances in Neural Information Processing Systems, Vol. 35 (2022), 26658-26670.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2021.102853"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.754"},{"key":"e_1_3_2_2_46_1","unstructured":"Sergey Zagoruyko and Nikos Komodakis. 2017. Paying More Attention to Attention: Improving the Performance of Convolutional Neural Networks via Attention Transfer. arXiv:1612.03928 [cs.CV] https:\/\/arxiv.org\/abs\/1612.03928"},{"key":"e_1_3_2_2_47_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00454"},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3070203"},{"key":"e_1_3_2_2_49_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01165"},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1145\/3219819.3219823"}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGMOD ACM Special Interest Group on Management of Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.2"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3711896.3737231","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T18:09:37Z","timestamp":1777572577000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3711896.3737231"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,3]]},"references-count":50,"alternative-id":["10.1145\/3711896.3737231","10.1145\/3711896"],"URL":"https:\/\/doi.org\/10.1145\/3711896.3737231","relation":{},"subject":[],"published":{"date-parts":[[2025,8,3]]},"assertion":[{"value":"2025-08-03","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}