{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,30]],"date-time":"2026-01-30T05:37:54Z","timestamp":1769751474727,"version":"3.49.0"},"publisher-location":"New York, NY, USA","reference-count":43,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,7,20]],"date-time":"2025-07-20T00:00:00Z","timestamp":1752969600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,7,20]]},"DOI":"10.1145\/3690624.3709416","type":"proceedings-article","created":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T18:42:22Z","timestamp":1743792142000},"page":"2638-2647","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["HoME: Hierarchy of Multi-Gate Experts for Multi-Task Learning at Kuaishou"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0941-6232","authenticated-orcid":false,"given":"Xu","family":"Wang","sequence":"first","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2681-0119","authenticated-orcid":false,"given":"Jiangxia","family":"Cao","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0260-6404","authenticated-orcid":false,"given":"Zhiyi","family":"Fu","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3636-3618","authenticated-orcid":false,"given":"Kun","family":"Gai","sequence":"additional","affiliation":[{"name":"Unaffiliated, Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8550-279X","authenticated-orcid":false,"given":"Guorui","family":"Zhou","sequence":"additional","affiliation":[{"name":"Kuaishou Technology, Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,7,20]]},"reference":[{"key":"e_1_3_2_2_1_1","volume-title":"Jamie Ryan Kiros, and Geoffrey E Hinton","author":"Ba Jimmy Lei","year":"2016","unstructured":"Jimmy Lei Ba, Jamie Ryan Kiros, and Geoffrey E Hinton. 2016. Layer Normalization. arXiv (2016)."},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959180"},{"key":"e_1_3_2_2_3_1","volume-title":"Multitask Learning. Machine Learning","author":"Caruana Rich","year":"1997","unstructured":"Rich Caruana. 1997. Multitask Learning. Machine Learning (1997)."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599922"},{"key":"e_1_3_2_2_5_1","volume-title":"PEPNet: Parameter and Embedding Personalized Network for Infusing with Personalized Prior Information. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD).","author":"Chang Jianxin","year":"2023","unstructured":"Jianxin Chang, Chenbin Zhang, Yiqun Hui, Dewei Leng, Yanan Niu, Yang Song, and Kun Gai. 2023b. PEPNet: Parameter and Embedding Personalized Network for Infusing with Personalized Prior Information. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/1390156.1390177"},{"key":"e_1_3_2_2_7_1","volume-title":"Deep Neural Networks for YouTube Recommendations. In ACM Conference on Recommender Systems (RecSys).","author":"Covington Paul","year":"2016","unstructured":"Paul Covington, Jay Adams, and Emre Sargin. 2016. Deep Neural Networks for YouTube Recommendations. In ACM Conference on Recommender Systems (RecSys)."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"crossref","unstructured":"Damai Dai Chengqi Deng Chenggang Zhao RX Xu Huazuo Gao Deli Chen Jiashi Li Wangding Zeng Xingkai Yu Y Wu et al. 2024. DeepSeekMoE: Towards Ultimate Expert Specialization in Mixture-of-Experts Language Models. arXiv (2024).","DOI":"10.18653\/v1\/2024.acl-long.70"},{"key":"e_1_3_2_2_9_1","volume-title":"The YouTube Video Recommendation System. In ACM Conference on Recommender Systems (RecSys).","author":"Davidson James","year":"2010","unstructured":"James Davidson, Benjamin Liebald, Junning Liu, Palash Nandy, Taylor Van Vleet, Ullas Gargi, Sujoy Gupta, Yu He, Mike Lambert, Blake Livingston, and Dasarathi Sampath. 2010. The YouTube Video Recommendation System. In ACM Conference on Recommender Systems (RecSys)."},{"key":"e_1_3_2_2_10_1","volume-title":"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv (2019)."},{"key":"e_1_3_2_2_11_1","volume-title":"MSSM: A Multiple-level Sparse Sharing Model for Efficient Multi-Task Learning. In International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR).","author":"Ding Ke","year":"2021","unstructured":"Ke Ding, Xin Dong, Yong He, Lei Cheng, Chilin Fu, Zhaoxin Huan, Hai Li, Tan Yan, Liang Zhang, Xiaolu Zhang, et al. 2021. MSSM: A Multiple-level Sparse Sharing Model for Efficient Multi-Task Learning. In International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)."},{"key":"e_1_3_2_2_12_1","volume-title":"HD-MTL: Hierarchical Deep Multi-Task Learning for Large-Scale Visual Recognition","author":"Fan Jianping","year":"2017","unstructured":"Jianping Fan, Tianyi Zhao, Zhenzhong Kuang, Yu Zheng, Ji Zhang, Jun Yu, and Jinye Peng. 2017. HD-MTL: Hierarchical Deep Multi-Task Learning for Large-Scale Visual Recognition. IEEE Transactions on Image Processing (TIP) (2017)."},{"key":"e_1_3_2_2_13_1","unstructured":"Joumana Ghosn and Yoshua Bengio. 1996. Multi-Task Learning for Stock Selection. In Advances in Neural Information Processing Systems (NeurIPS)."},{"key":"e_1_3_2_2_14_1","volume-title":"Negative and Neutral: Modeling Implicit Feedback in Session-based News Recommendation. In International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR).","author":"Gong Shansan","year":"2022","unstructured":"Shansan Gong and Kenny Q Zhu. 2022. Positive, Negative and Neutral: Modeling Implicit Feedback in Session-based News Recommendation. In International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)."},{"key":"e_1_3_2_2_15_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. arXiv","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. LoRA: Low-Rank Adaptation of Large Language Models. arXiv (2021)."},{"key":"e_1_3_2_2_16_1","volume-title":"International Conference on Machine Learning (ICML).","author":"Ioffe Sergey","year":"2015","unstructured":"Sergey Ioffe and Christian Szegedy. 2015. Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift. In International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_2_17_1","volume-title":"Hinton","author":"Jacobs Robert A.","year":"1991","unstructured":"Robert A. Jacobs, Michael I. Jordan, Steven J. Nowlan, and Geoffrey E. Hinton. 1991. Adaptive Mixtures of Local Experts. Neural Computation (1991)."},{"key":"e_1_3_2_2_18_1","volume-title":"IEEE\/CVF Computer Vision and Pattern Recognition Conference (CVPR).","author":"Kendall Alex","year":"2018","unstructured":"Alex Kendall, Yarin Gal, and Roberto Cipolla. 2018. Multi-Task Learning Using Uncertainty to Weigh Losses for Scene Geometry and Semantics. In IEEE\/CVF Computer Vision and Pattern Recognition Conference (CVPR)."},{"key":"e_1_3_2_2_19_1","volume-title":"AdaTT: Adaptive Task-to-Task Fusion Network for Multitask Learning in Recommendations. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD).","author":"Li Danwei","year":"2023","unstructured":"Danwei Li, Zhengyu Zhang, Siyang Yuan, Mingze Gao, Weilin Zhang, Chaofei Yang, Xi Liu, and Jiyan Yang. 2023. AdaTT: Adaptive Task-to-Task Fusion Network for Multitask Learning in Recommendations. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)."},{"key":"e_1_3_2_2_20_1","volume-title":"Deep Task-specific Bottom Representation Network for Multi-Task Recommendation. In ACM International Conference on Information and Knowledge Management (CIKM).","author":"Liu Qi","year":"2023","unstructured":"Qi Liu, Zhilong Zhou, Gangwei Jiang, Tiezheng Ge, and Defu Lian. 2023. Deep Task-specific Bottom Representation Network for Multi-Task Recommendation. In ACM International Conference on Information and Knowledge Management (CIKM)."},{"key":"e_1_3_2_2_21_1","volume-title":"Sora: A Review on Background, Technology, Limitations, and Opportunities of Large Vision Models. arXiv","author":"Liu Yixin","year":"2024","unstructured":"Yixin Liu, Kai Zhang, Yuan Li, Zhiling Yan, Chujie Gao, Ruoxi Chen, Zhengqing Yuan, Yue Huang, Hanchi Sun, Jianfeng Gao, et al. 2024. Sora: A Review on Background, Technology, Limitations, and Opportunities of Large Vision Models. arXiv (2024)."},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/TITS.2016.2598356"},{"key":"e_1_3_2_2_23_1","volume-title":"Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD).","author":"Ma Jiaqi","year":"2018","unstructured":"Jiaqi Ma, Zhe Zhao, Xinyang Yi, Jilin Chen, Lichan Hong, and Ed H Chi. 2018b. Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_2_25_1","volume-title":"Cross-Stitch Networks for Multi-Task Learning. In IEEE\/CVF Computer Vision and Pattern Recognition Conference (CVPR).","author":"Misra Ishan","year":"2016","unstructured":"Ishan Misra, Abhinav Shrivastava, Abhinav Gupta, and Martial Hebert. 2016. Cross-Stitch Networks for Multi-Task Learning. In IEEE\/CVF Computer Vision and Pattern Recognition Conference (CVPR)."},{"key":"e_1_3_2_2_26_1","volume-title":"Multi-Task Learning of Hierarchical Vision-Language Representation. In IEEE\/CVF Computer Vision and Pattern Recognition Conference (CVPR).","author":"Nguyen Duy-Kien","year":"2019","unstructured":"Duy-Kien Nguyen and Takayuki Okatani. 2019. Multi-Task Learning of Hierarchical Vision-Language Representation. In IEEE\/CVF Computer Vision and Pattern Recognition Conference (CVPR)."},{"key":"e_1_3_2_2_27_1","volume-title":"Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction. In ACM International Conference on Information and Knowledge Management (CIKM).","author":"Pi Qi","year":"2020","unstructured":"Qi Pi, Guorui Zhou, Yujing Zhang, Zhe Wang, Lejian Ren, Ying Fan, Xiaoqiang Zhu, and Kun Gai. 2020. Search-based User Interest Modeling with Lifelong Sequential Behavior Data for Click-Through Rate Prediction. In ACM International Conference on Information and Knowledge Management (CIKM)."},{"key":"e_1_3_2_2_28_1","volume-title":"Searching for Activation Functions. arXiv","author":"Ramachandran Prajit","year":"2017","unstructured":"Prajit Ramachandran, Barret Zoph, and Quoc V Le. 2017. Searching for Activation Functions. arXiv (2017)."},{"key":"e_1_3_2_2_29_1","volume-title":"Sluice Networks: Learning What to Share Between Loosely Related Tasks. arXiv","author":"Ruder Sebastian","year":"2017","unstructured":"Sebastian Ruder, Joachim Bingel, Isabelle Augenstein, and Anders S\u00f8gaard. 2017. Sluice Networks: Learning What to Share Between Loosely Related Tasks. arXiv (2017)."},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33016949"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3481941"},{"key":"e_1_3_2_2_32_1","volume-title":"STEM: Unleashing the Power of Embeddings for Multi-task Recommendation. In AAAI Conference on Artificial Intelligence (AAAI).","author":"Su Liangcai","year":"2024","unstructured":"Liangcai Su, Junwei Pan, Ximei Wang, Xi Xiao, Shijie Quan, Xihua Chen, and Jie Jiang. 2024. STEM: Unleashing the Power of Embeddings for Multi-task Recommendation. In AAAI Conference on Artificial Intelligence (AAAI)."},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_2_34_1","volume-title":"Deep Feedback Network for Recommendation. In International Joint Conference on Artificial Intelligence (IJCAI).","author":"Xie Ruobing","year":"2021","unstructured":"Ruobing Xie, Cheng Ling, Yalong Wang, Rui Wang, Feng Xia, and Leyu Lin. 2021. Deep Feedback Network for Recommendation. In International Joint Conference on Artificial Intelligence (IJCAI)."},{"key":"e_1_3_2_2_35_1","volume-title":"Trinity: Syncretizing Multi-\/Long-tail\/Long-term Interests All in One. arXiv","author":"Yan Jing","year":"2024","unstructured":"Jing Yan, Liu Jiang, Jianfei Cui, Zhichen Zhao, Xingyan Bin, Feng Zhang, and Zuotao Liu. 2024. Trinity: Syncretizing Multi-\/Long-tail\/Long-term Interests All in One. arXiv (2024)."},{"key":"e_1_3_2_2_36_1","volume-title":"Words: Trillion-Parameter Sequential Transducers for Generative Recommendations. arXiv","author":"Zhai Jiaqi","year":"2024","unstructured":"Jiaqi Zhai, Lucy Liao, Xing Liu, Yueming Wang, Rui Li, Xuan Cao, Leon Gao, Zhaojie Gong, Fangda Gu, Michael He, et al. 2024. Actions Speak Louder than Words: Trillion-Parameter Sequential Transducers for Generative Recommendations. arXiv (2024)."},{"key":"e_1_3_2_2_37_1","volume-title":"Multi-Behavior Collaborative Filtering with Partial Order Graph Convolutional Networks. arXiv","author":"Zhang Yijie","year":"2024","unstructured":"Yijie Zhang, Yuanchen Bei, Hao Chen, Qijie Shen, Zheng Yuan, Huan Gong, Senzhang Wang, Feiran Huang, and Xiao Huang. 2024a. Multi-Behavior Collaborative Filtering with Partial Order Graph Convolutional Networks. arXiv (2024)."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2021.3070203"},{"key":"e_1_3_2_2_39_1","volume-title":"SpeechLM: Enhanced Speech Pre-Training With Unpaired Textual Data","author":"Zhang Ziqiang","year":"2024","unstructured":"Ziqiang Zhang, Sanyuan Chen, Long Zhou, Yu Wu, Shuo Ren, Shujie Liu, Zhuoyuan Yao, Xun Gong, Lirong Dai, Jinyu Li, and Furu Wei. 2024b. SpeechLM: Enhanced Speech Pre-Training With Unpaired Textual Data. IEEE\/ACM Transactions on Audio, Speech and Language Processing (TASLP) (2024)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3346997"},{"key":"e_1_3_2_2_41_1","volume-title":"Deep Interest Evolution Network for Click-Through Rate Prediction. In AAAI Conference on Artificial Intelligence (AAAI).","author":"Zhou Guorui","year":"2019","unstructured":"Guorui Zhou, Na Mou, Ying Fan, Qi Pi, Weijie Bian, Chang Zhou, Xiaoqiang Zhu, and Kun Gai. 2019. Deep Interest Evolution Network for Click-Through Rate Prediction. In AAAI Conference on Artificial Intelligence (AAAI)."},{"key":"e_1_3_2_2_42_1","volume-title":"Deep Interest Network for Click-Through Rate Prediction. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD).","author":"Zhou Guorui","year":"2018","unstructured":"Guorui Zhou, Xiaoqiang Zhu, Chenru Song, Ying Fan, Han Zhu, Xiao Ma, Yanghui Yan, Junqi Jin, Han Li, and Kun Gai. 2018. Deep Interest Network for Click-Through Rate Prediction. In ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)."},{"key":"e_1_3_2_2_43_1","volume-title":"Exploring Training on Heterogeneous Data with Mixture of Low-rank Adapters. arXiv","author":"Zhou Yuhang","year":"2024","unstructured":"Yuhang Zhou, Zihua Zhao, Haolin Li, Siyuan Du, Jiangchao Yao, Ya Zhang, and Yanfeng Wang. 2024. Exploring Training on Heterogeneous Data with Mixture of Low-rank Adapters. arXiv (2024)."}],"event":{"name":"KDD '25: The 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining","location":"Toronto ON Canada","acronym":"KDD '25","sponsor":["SIGMOD ACM Special Interest Group on Management of Data","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data"]},"container-title":["Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709416","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3690624.3709416","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,16]],"date-time":"2025-08-16T15:34:38Z","timestamp":1755358478000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3690624.3709416"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,20]]},"references-count":43,"alternative-id":["10.1145\/3690624.3709416","10.1145\/3690624"],"URL":"https:\/\/doi.org\/10.1145\/3690624.3709416","relation":{},"subject":[],"published":{"date-parts":[[2025,7,20]]},"assertion":[{"value":"2025-07-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}