{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,10]],"date-time":"2026-05-10T00:35:19Z","timestamp":1778373319609,"version":"3.51.4"},"publisher-location":"New York, NY, USA","reference-count":41,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761561","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T01:03:42Z","timestamp":1762563822000},"page":"6193-6200","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["Climber: Toward Efficient Scaling Laws for Large Recommendation Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9515-7226","authenticated-orcid":false,"given":"Songpei","family":"Xu","sequence":"first","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9170-1889","authenticated-orcid":false,"given":"Shijia","family":"Wang","sequence":"additional","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1263-5383","authenticated-orcid":false,"given":"Da","family":"Guo","sequence":"additional","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5687-1815","authenticated-orcid":false,"given":"Xianwen","family":"Guo","sequence":"additional","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3940-5449","authenticated-orcid":false,"given":"Qiang","family":"Xiao","sequence":"additional","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3214-1211","authenticated-orcid":false,"given":"Bin","family":"Huang","sequence":"additional","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7603-5098","authenticated-orcid":false,"given":"Guanlin","family":"Wu","sequence":"additional","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-7022-8023","authenticated-orcid":false,"given":"Chuanjiang","family":"Luo","sequence":"additional","affiliation":[{"name":"NetEase Cloud Music, Hangzhou, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Understanding scaling laws for recommendation models. arXiv preprint arXiv:2208.08489","author":"Ardalani Newsha","year":"2022","unstructured":"Newsha Ardalani, Carole-Jean Wu, Zeliang Chen, Bhargav Bhushanam, and Adnan Aziz. 2022. Understanding scaling laws for recommendation models. arXiv preprint arXiv:2208.08489 (2022)."},{"key":"e_1_3_2_1_2_1","volume-title":"Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966","author":"Bai Jinze","year":"2023","unstructured":"Jinze Bai, Shuai Bai, Shusheng Yang, Shijie Wang, Sinan Tan, Peng Wang, Junyang Lin, Chang Zhou, and Jingren Zhou. 2023. Qwen-vl: A frontier large vision-language model with versatile abilities. arXiv preprint arXiv:2308.12966 (2023)."},{"key":"e_1_3_2_1_3_1","unstructured":"Xiao Bi Deli Chen Guanting Chen Shanhuang Chen Damai Dai Chengqi Deng Honghui Ding Kai Dong Qiushi Du Zhe Fu et al. 2024. Deepseek llm: Scaling open-source language models with longtermism. arXiv preprint arXiv:2401.02954 (2024)."},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3308558.3313641"},{"key":"e_1_3_2_1_5_1","volume-title":"https:\/\/adamcasson.com\/posts\/transformer-flops","author":"Casson Adam","year":"2023","unstructured":"Adam Casson. 2023. Transformer FLOPs. (2023). https:\/\/adamcasson.com\/posts\/transformer-flops"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599922"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"e_1_3_2_1_8_1","volume-title":"Flashattention: Fast and memory-efficient exact attention with io-awareness. Advances in neural information processing systems","author":"Dao Tri","year":"2022","unstructured":"Tri Dao, Dan Fu, Stefano Ermon, Atri Rudra, and Christopher R\u00e9. 2022. Flashattention: Fast and memory-efficient exact attention with io-awareness. Advances in neural information processing systems, Vol. 35 (2022), 16344-16359."},{"key":"e_1_3_2_1_9_1","volume-title":"Get More with LESS: Synthesizing Recurrence with KV Cache Compression for Efficient LLM Inference. arXiv preprint arXiv:2402.09398","author":"Dong Harry","year":"2024","unstructured":"Harry Dong, Xinyu Yang, Zhenyu Zhang, Zhangyang Wang, Yuejie Chi, and Beidi Chen. 2024. Get More with LESS: Synthesizing Recurrence with KV Cache Compression for Efficient LLM Inference. arXiv preprint arXiv:2402.09398 (2024)."},{"key":"e_1_3_2_1_10_1","volume-title":"Zhongzhou Liu, Kai Cheng, Qiushi Pan, Yi Quan Lee, Wanqi Xue, Tingjia Shen, et al.","author":"Guo Wei","year":"2024","unstructured":"Wei Guo, Hao Wang, Luankang Zhang, Jin Yao Chin, Zhongzhou Liu, Kai Cheng, Qiushi Pan, Yi Quan Lee, Wanqi Xue, Tingjia Shen, et al., 2024. Scaling New Frontiers: Insights into Large Recommendation Models. arXiv preprint arXiv:2412.00714 (2024)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.asoc.2018.05.012"},{"key":"e_1_3_2_1_12_1","volume-title":"Distilling the Knowledge in a Neural Network. arXiv preprint arXiv:1503.02531","author":"Hinton Geoffrey","year":"2015","unstructured":"Geoffrey Hinton. 2015. Distilling the Knowledge in a Neural Network. arXiv preprint arXiv:1503.02531 (2015)."},{"key":"e_1_3_2_1_13_1","volume-title":"Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, et al.","author":"Hoffmann Jordan","year":"2022","unstructured":"Jordan Hoffmann, Sebastian Borgeaud, Arthur Mensch, Elena Buchatskaya, Trevor Cai, Eliza Rutherford, Diego de Las Casas, Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, et al., 2022. Training compute-optimal large language models. arXiv preprint arXiv:2203.15556 (2022)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583434"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3534678.3539381"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00745"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3298689.3347043"},{"key":"e_1_3_2_1_18_1","volume-title":"Scaling laws for neural language models. arXiv preprint arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling laws for neural language models. arXiv preprint arXiv:2001.08361 (2020)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357814"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599519"},{"key":"e_1_3_2_1_21_1","unstructured":"Aixin Liu Bei Feng Bin Wang Bingxuan Wang Bo Liu Chenggang Zhao Chengqi Dengr Chong Ruan Damai Dai Daya Guo et al. 2024b. Deepseek-v2: A strong economical and efficient mixture-of-experts language model. arXiv preprint arXiv:2405.04434 (2024)."},{"key":"e_1_3_2_1_22_1","unstructured":"Aixin Liu Bei Feng Bing Xue Bingxuan Wang Bochao Wu Chengda Lu Chenggang Zhao Chengqi Deng Chenyu Zhang Chong Ruan et al. 2024c. Deepseek-v3 technical report. arXiv preprint arXiv:2412.19437 (2024)."},{"key":"e_1_3_2_1_23_1","volume-title":"Advances in Neural Information Processing Systems","volume":"36","author":"Liu Zichang","year":"2024","unstructured":"Zichang Liu, Aditya Desai, Fangshuo Liao, Weitao Wang, Victor Xie, Zhaozhuo Xu, Anastasios Kyrillidis, and Anshumali Shrivastava. 2024a. Scissorhands: Exploiting the persistence of importance hypothesis for llm kv cache compression at test time. Advances in Neural Information Processing Systems, Vol. 36 (2024)."},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557298"},{"key":"e_1_3_2_1_25_1","volume-title":"MARM: Unlocking the Future of Recommendation Systems through Memory Augmentation and Scalable Complexity. arXiv preprint arXiv:2411.09425","author":"Lv Xiao","year":"2024","unstructured":"Xiao Lv, Jiangxia Cao, Shijie Guan, Xiaoyou Zhou, Zhiguang Qi, Yaqiang Zang, Ming Li, Ben Wang, Kun Gai, and Guorui Zhou. 2024. MARM: Unlocking the Future of Recommendation Systems through Memory Augmentation and Scalable Complexity. arXiv preprint arXiv:2411.09425 (2024)."},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/2766462.2767755"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3533727"},{"key":"e_1_3_2_1_28_1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of machine learning research, Vol. 21, 140 (2020), 1-67.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3680030"},{"key":"e_1_3_2_1_30_1","volume-title":"Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, et al., 2023. Llama: Open and efficient foundation language models. arXiv preprint arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_31_1","volume-title":"RecSys Posters","volume":"75","author":"Turrin Roberto","year":"2015","unstructured":"Roberto Turrin, Massimo Quadrana, Andrea Condorelli, Roberto Pagano, Paolo Cremonesi, et al., 2015. 30Music Listening and Playlists Dataset. RecSys Posters, Vol. 75 (2015)."},{"key":"e_1_3_2_1_32_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3711896.3737212"},{"key":"e_1_3_2_1_34_1","volume-title":"Sparsity-Aware Personalized Pattern Extractor Network for Music Multi-task Learning. In International Conference on Database Systems for Advanced Applications. Springer, 352-363","author":"Wang Shijia","year":"2024","unstructured":"Shijia Wang, Yi Zheng, Qiang Xiao, Yilong Zhao, Qimeng Yang, and Chuanjiang Luo. 2024b. Sparsity-Aware Personalized Pattern Extractor Network for Music Multi-task Learning. In International Conference on Database Systems for Advanced Applications. Springer, 352-363."},{"key":"e_1_3_2_1_35_1","volume-title":"Scaling Laws for Online Advertisement Retrieval. arXiv preprint arXiv:2411.13322","author":"Wang Yunli","year":"2024","unstructured":"Yunli Wang, Zixuan Yang, Zhen Zhang, Zhiqiang Wang, Jian Yang, Shiyang Wen, Peng Jiang, and Kun Gai. 2024a. Scaling Laws for Online Advertisement Retrieval. arXiv preprint arXiv:2411.13322 (2024)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1145\/3340531.3412092"},{"key":"e_1_3_2_1_37_1","volume-title":"Cascading Multimodal Feature Enhanced Contrast Learning for Music Recommendation. In 2024 IEEE International Conference on Data Mining (ICDM). IEEE, 905-910","author":"Yang Qimeng","year":"2024","unstructured":"Qimeng Yang, Shijia Wang, Da Guo, Dongjin Yu, Qiang Xiao, Dongjing Wang, and Chuanjiang Luo. 2024. Cascading Multimodal Feature Enhanced Contrast Learning for Music Recommendation. In 2024 IEEE International Conference on Data Mining (ICDM). IEEE, 905-910."},{"key":"e_1_3_2_1_38_1","unstructured":"Jiaqi Zhai Lucy Liao Xing Liu Yueming Wang Rui Li Xuan Cao Leon Gao Zhaojie Gong Fangda Gu Michael He et al. 2024. Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv preprint arXiv:2402.17152 (2024)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"e_1_3_2_1_40_1","volume-title":"Wukong: Towards a Scaling Law for Large-Scale Recommendation. arXiv preprint arXiv:2403.02545","author":"Zhang Buyun","year":"2024","unstructured":"Buyun Zhang, Liang Luo, Yuxin Chen, Jade Nie, Xi Liu, Daifeng Guo, Yanli Zhao, Shen Li, Yuchen Hao, Yantao Yao, et al., 2024b. Wukong: Towards a Scaling Law for Large-Scale Recommendation. arXiv preprint arXiv:2403.02545 (2024)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3640457.3688129"}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","location":"Seoul Republic of Korea","acronym":"CIKM '25","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761561","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:18:34Z","timestamp":1765502314000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761561"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":41,"alternative-id":["10.1145\/3746252.3761561","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761561","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}