{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,10]],"date-time":"2026-04-10T10:07:43Z","timestamp":1775815663104,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":52,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:00:00Z","timestamp":1728345600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,8]]},"DOI":"10.1145\/3640457.3688129","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T15:39:28Z","timestamp":1728401968000},"page":"444-453","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":20,"title":["Scaling Law of Large Sequential Recommendation Models"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8112-4552","authenticated-orcid":false,"given":"Gaowei","family":"Zhang","sequence":"first","affiliation":[{"name":"GSAI, Renmin University of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0747-8010","authenticated-orcid":false,"given":"Yupeng","family":"Hou","sequence":"additional","affiliation":[{"name":"University of California San Diego, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0247-2496","authenticated-orcid":false,"given":"Hongyu","family":"Lu","sequence":"additional","affiliation":[{"name":"Wechat, Tencent, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-0726-2727","authenticated-orcid":false,"given":"Yu","family":"Chen","sequence":"additional","affiliation":[{"name":"WeChat, Tencent, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8333-6196","authenticated-orcid":false,"given":"Wayne Xin","family":"Zhao","sequence":"additional","affiliation":[{"name":"GSAI, Renmin University of China, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9777-9676","authenticated-orcid":false,"given":"Ji-Rong","family":"Wen","sequence":"additional","affiliation":[{"name":"GSAI, Renmin University of China, China"}]}],"member":"320","published-online":{"date-parts":[[2024,10,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Understanding Scaling Laws for Recommendation Models. arXiv:2208.08489","author":"Ardalani Newsha","year":"2022","unstructured":"Newsha Ardalani, Carole-Jean Wu, Zeliang Chen, Bhargav Bhushanam, and Adnan Aziz. 2022. Understanding Scaling Laws for Recommendation Models. arXiv:2208.08489 (2022)."},{"key":"e_1_3_2_1_2_1","volume-title":"Adaptive input representations for neural language modeling. arXiv:1809.10853","author":"Baevski Alexei","year":"2018","unstructured":"Alexei Baevski and Michael Auli. 2018. Adaptive input representations for neural language modeling. arXiv:1809.10853 (2018)."},{"key":"e_1_3_2_1_3_1","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown Tom","year":"2020","unstructured":"Tom Brown, Benjamin Mann, Nick Ryder, Melanie Subbiah, Jared\u00a0D Kaplan, Prafulla Dhariwal, Arvind Neelakantan, Pranav Shyam, Girish Sastry, Amanda Askell, 2020. Language models are few-shot learners. NeurIPS 33 (2020), 1877\u20131901.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"crossref","unstructured":"Jiangxia Cao Xin Cong Jiawei Sheng Tingwen Liu and Bin Wang. 2022. Contrastive Cross-Domain Sequential Recommendation. In CIKM. 138\u2013147.","DOI":"10.1145\/3511808.3557262"},{"key":"e_1_3_2_1_5_1","volume-title":"Quantifying memorization across neural language models. arXiv:2202.07646","author":"Carlini Nicholas","year":"2022","unstructured":"Nicholas Carlini, Daphne Ippolito, Matthew Jagielski, Katherine Lee, Florian Tramer, and Chiyuan Zhang. 2022. Quantifying memorization across neural language models. arXiv:2202.07646 (2022)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"crossref","unstructured":"Jianxin Chang Chen Gao Yu Zheng Yiqun Hui Yanan Niu Yang Song Depeng Jin and Yong Li. 2021. Sequential recommendation with graph neural networks. In SIGIR. 378\u2013387.","DOI":"10.1145\/3404835.3462968"},{"key":"e_1_3_2_1_7_1","unstructured":"Sharad Chitlangia Krishna\u00a0Reddy Kesari and Rajat Agarwal. 2023. Scaling generative pre-training for user ad activity sequences. (2023)."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"crossref","unstructured":"Xinyan Fan Zheng Liu Jianxun Lian Wayne\u00a0Xin Zhao Xing Xie and Ji-Rong Wen. 2021. Lighter and better: low-rank decomposed self-attention networks for next-item recommendation. In SIGIR. 1733\u20131737.","DOI":"10.1145\/3404835.3462978"},{"key":"e_1_3_2_1_9_1","first-page":"23","article-title":"A new algorithm for data compression","volume":"12","author":"Gage Philip","year":"1994","unstructured":"Philip Gage. 1994. A new algorithm for data compression. C Users Journal 12, 2 (1994), 23\u201338.","journal-title":"C Users Journal"},{"key":"e_1_3_2_1_10_1","volume-title":"Multimodal-gpt: A vision and language model for dialogue with humans. arXiv:2305.04790","author":"Gong Tao","year":"2023","unstructured":"Tao Gong, Chengqi Lyu, Shilong Zhang, Yudong Wang, Miao Zheng, Qian Zhao, Kuikun Liu, Wenwei Zhang, Ping Luo, and Kai Chen. 2023. Multimodal-gpt: A vision and language model for dialogue with humans. arXiv:2305.04790 (2023)."},{"key":"e_1_3_2_1_11_1","volume-title":"On the Embedding Collapse when Scaling up Recommendation Models. arXiv:2310.04400","author":"Guo Xingzhuo","year":"2023","unstructured":"Xingzhuo Guo, Junwei Pan, Ximei Wang, Baixu Chen, Jie Jiang, and Mingsheng Long. 2023. On the Embedding Collapse when Scaling up Recommendation Models. arXiv:2310.04400 (2023)."},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/2827872"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3397271.3401063"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3209981"},{"key":"e_1_3_2_1_16_1","volume-title":"Scaling laws for autoregressive generative modeling. arXiv:2010.14701","author":"Henighan Tom","year":"2020","unstructured":"Tom Henighan, Jared Kaplan, Mor Katz, Mark Chen, Christopher Hesse, Jacob Jackson, Heewoo Jun, Tom\u00a0B Brown, Prafulla Dhariwal, Scott Gray, 2020. Scaling laws for autoregressive generative modeling. arXiv:2010.14701 (2020)."},{"key":"e_1_3_2_1_17_1","volume-title":"Session-based recommendations with recurrent neural networks. arXiv:1511.06939","author":"Hidasi Bal\u00e1zs","year":"2015","unstructured":"Bal\u00e1zs Hidasi, Alexandros Karatzoglou, Linas Baltrunas, and Domonkos Tikk. 2015. Session-based recommendations with recurrent neural networks. arXiv:1511.06939 (2015)."},{"key":"e_1_3_2_1_18_1","volume-title":"Lisa\u00a0Anne Hendricks","author":"Hoffmann Jordan","year":"2022","unstructured":"Jordan Hoffmann, Sebastian Borgeaud, Arthur Mensch, Elena Buchatskaya, Trevor Cai, Eliza Rutherford, Diego de\u00a0Las Casas, Lisa\u00a0Anne Hendricks, Johannes Welbl, Aidan Clark, 2022. Training compute-optimal large language models. arXiv:2203.15556 (2022)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"crossref","unstructured":"Yupeng Hou Zhankui He Julian McAuley and Wayne\u00a0Xin Zhao. 2023. Learning vector-quantized item representation for transferable sequential recommenders. In WWW. 1162\u20131171.","DOI":"10.1145\/3543507.3583434"},{"key":"e_1_3_2_1_20_1","volume-title":"CORE: Simple and Effective Session-based Recommendation within Consistent Representation Space. In SIGIR.","author":"Hou Yupeng","year":"2022","unstructured":"Yupeng Hou, Binbin Hu, Zhiqiang Zhang, and Wayne\u00a0Xin Zhao. 2022. CORE: Simple and Effective Session-based Recommendation within Consistent Representation Space. In SIGIR."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"crossref","unstructured":"Yupeng Hou Shanlei Mu Wayne\u00a0Xin Zhao Yaliang Li Bolin Ding and Ji-Rong Wen. 2022. Towards universal sequence representation learning for recommender systems. In KDD. 585\u2013593.","DOI":"10.1145\/3534678.3539381"},{"key":"e_1_3_2_1_22_1","volume-title":"Self-attentive sequential recommendation","author":"Kang Wang-Cheng","unstructured":"Wang-Cheng Kang and Julian McAuley. 2018. Self-attentive sequential recommendation. In ICDM. IEEE, 197\u2013206."},{"key":"e_1_3_2_1_23_1","volume-title":"Scaling laws for neural language models. arXiv:2001.08361","author":"Kaplan Jared","year":"2020","unstructured":"Jared Kaplan, Sam McCandlish, Tom Henighan, Tom\u00a0B Brown, Benjamin Chess, Rewon Child, Scott Gray, Alec Radford, Jeffrey Wu, and Dario Amodei. 2020. Scaling laws for neural language models. arXiv:2001.08361 (2020)."},{"key":"e_1_3_2_1_24_1","volume-title":"Improving generalization performance by switching from adam to sgd. arXiv preprint arXiv:1712.07628","author":"Keskar Nitish\u00a0Shirish","year":"2017","unstructured":"Nitish\u00a0Shirish Keskar and Richard Socher. 2017. Improving generalization performance by switching from adam to sgd. arXiv preprint arXiv:1712.07628 (2017)."},{"key":"e_1_3_2_1_25_1","volume-title":"Adam: A method for stochastic optimization. arXiv:1412.6980","author":"Kingma P","year":"2014","unstructured":"Diederik\u00a0P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_26_1","volume-title":"Text Is All You Need: Learning Language Representations for Sequential Recommendation. arXiv:2305.13731","author":"Li Jiacheng","year":"2023","unstructured":"Jiacheng Li, Ming Wang, Jin Li, Jinmiao Fu, Xin Shen, Jingbo Shang, and Julian McAuley. 2023. Text Is All You Need: Learning Language Representations for Sequential Recommendation. arXiv:2305.13731 (2023)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"crossref","unstructured":"Yang Li Tong Chen Peng-Fei Zhang and Hongzhi Yin. 2021. Lightweight self-attentive sequential recommendation. In CIKM. 967\u2013977.","DOI":"10.1145\/3459637.3482448"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"crossref","unstructured":"Zihan Lin Changxin Tian Yupeng Hou and Wayne\u00a0Xin Zhao. 2022. Improving graph collaborative filtering with neighborhood-enriched contrastive learning. In WWW. 2320\u20132329.","DOI":"10.1145\/3485447.3512104"},{"key":"e_1_3_2_1_29_1","volume-title":"Dropout Reduces Underfitting. arXiv:2303.01500","author":"Liu Zhuang","year":"2023","unstructured":"Zhuang Liu, Zhiqiu Xu, Joseph Jin, Zhiqiang Shen, and Trevor Darrell. 2023. Dropout Reduces Underfitting. arXiv:2303.01500 (2023)."},{"key":"e_1_3_2_1_30_1","volume-title":"Neural networks for optimal approximation of smooth and analytic functions. Neural computation 8, 1","author":"Mhaskar N","year":"1996","unstructured":"Hrushikesh\u00a0N Mhaskar. 1996. Neural networks for optimal approximation of smooth and analytic functions. Neural computation 8, 1 (1996), 164\u2013177."},{"key":"e_1_3_2_1_31_1","volume-title":"Scaling Data-Constrained Language Models. arXiv:2305.16264","author":"Muennighoff Niklas","year":"2023","unstructured":"Niklas Muennighoff, Alexander\u00a0M Rush, Boaz Barak, Teven\u00a0Le Scao, Aleksandra Piktus, Nouamane Tazi, Sampo Pyysalo, Thomas Wolf, and Colin Raffel. 2023. Scaling Data-Constrained Language Models. arXiv:2305.16264 (2023)."},{"key":"e_1_3_2_1_32_1","unstructured":"Jianmo Ni Jiacheng Li and Julian McAuley. 2019. Justifying recommendations using distantly-labeled reviews and fine-grained aspects. In EMNLP-IJCNLP. 188\u2013197."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"crossref","unstructured":"John O\u2019Donovan and Barry Smyth. 2005. Trust in recommender systems. In IUI. 167\u2013174.","DOI":"10.1145\/1040830.1040870"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/1031114.1031116"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"crossref","unstructured":"Steffen Rendle Christoph Freudenthaler and Lars Schmidt-Thieme. 2010. Factorizing personalized markov chains for next-basket recommendation. In WWW. 811\u2013820.","DOI":"10.1145\/1772690.1772773"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"crossref","unstructured":"Kyuyong Shin Hanock Kwak Su\u00a0Young Kim Max\u00a0Nihl\u00e9n Ramstr\u00f6m Jisu Jeong Jung-Woo Ha and Kyung-Min Kim. 2023. Scaling law for recommendation models: Towards general-purpose user representations. In AAAI Vol.\u00a037. 4596\u20134604.","DOI":"10.1609\/aaai.v37i4.25582"},{"key":"e_1_3_2_1_38_1","volume-title":"Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1","author":"Srivastava Nitish","year":"2014","unstructured":"Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever, and Ruslan Salakhutdinov. 2014. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research 15, 1 (2014), 1929\u20131958."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"crossref","unstructured":"Fei Sun Jun Liu Jian Wu Changhua Pei Xiao Lin Wenwu Ou and Peng Jiang. 2019. BERT4Rec: Sequential recommendation with bidirectional encoder representations from transformer. In CIKM. 1441\u20131450.","DOI":"10.1145\/3357384.3357895"},{"key":"e_1_3_2_1_40_1","volume-title":"Towards More Robust and Accurate Sequential Recommendation with Cascade-guided Adversarial Training. arXiv:2304.05492","author":"Tan Juntao","year":"2023","unstructured":"Juntao Tan, Shelby Heinecke, Zhiwei Liu, Yongjun Chen, Yongfeng Zhang, and Huan Wang. 2023. Towards More Robust and Accurate Sequential Recommendation with Cascade-guided Adversarial Training. arXiv:2304.05492 (2023)."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"crossref","unstructured":"Jiaxi Tang and Ke Wang. 2018. Personalized top-n sequential recommendation via convolutional sequence embedding. In WSDM. 565\u2013573.","DOI":"10.1145\/3159652.3159656"},{"key":"e_1_3_2_1_42_1","volume-title":"One Model for All: Large Language Models are Domain-Agnostic Recommendation Systems. arXiv:2310.14304","author":"Tang Zuoli","year":"2023","unstructured":"Zuoli Tang, Zhaoxin Huan, Zihao Li, Xiaolu Zhang, Jun Hu, Chilin Fu, Jun Zhou, and Chenliang Li. 2023. One Model for All: Large Language Models are Domain-Agnostic Recommendation Systems. arXiv:2310.14304 (2023)."},{"key":"e_1_3_2_1_43_1","first-page":"38274","article-title":"Memorization without overfitting: Analyzing the training dynamics of large language models","volume":"35","author":"Tirumala Kushal","year":"2022","unstructured":"Kushal Tirumala, Aram Markosyan, Luke Zettlemoyer, and Armen Aghajanyan. 2022. Memorization without overfitting: Analyzing the training dynamics of large language models. NeurIPS 35 (2022), 38274\u201338290.","journal-title":"NeurIPS"},{"key":"e_1_3_2_1_44_1","volume-title":"Llama: Open and efficient foundation language models. arXiv:2302.13971","author":"Touvron Hugo","year":"2023","unstructured":"Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timoth\u00e9e Lacroix, Baptiste Rozi\u00e8re, Naman Goyal, Eric Hambro, Faisal Azhar, 2023. Llama: Open and efficient foundation language models. arXiv:2302.13971 (2023)."},{"key":"e_1_3_2_1_45_1","volume-title":"Attention is all you need. NeurIPS 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. NeurIPS 30 (2017)."},{"key":"e_1_3_2_1_46_1","volume-title":"Transformers: State-of-the-Art Natural Language Processing","author":"Wolf Thomas","year":"2020","unstructured":"Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Clement Delangue, Anthony Moi, Pierric Cistac, Tim Rault, R\u00e9mi Louf, Morgan Funtowicz, Joe Davison, Sam Shleifer, Patrick von Platen, Clara Ma, Yacine Jernite, Julien Plu, Canwen Xu, Teven\u00a0Le Scao, Sylvain Gugger, Mariama Drame, Quentin Lhoest, and Alexander\u00a0M. Rush. 2020. Transformers: State-of-the-Art Natural Language Processing. In EMNLP. Association for Computational Linguistics, Online, 38\u201345. https:\/\/www.aclweb.org\/anthology\/2020.emnlp-demos.6"},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"crossref","unstructured":"Shu Wu Yuyuan Tang Yanqiao Zhu Liang Wang Xing Xie and Tieniu Tan. 2019. Session-based recommendation with graph neural networks. In AAAI Vol.\u00a033. 346\u2013353.","DOI":"10.1609\/aaai.v33i01.3301346"},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"crossref","unstructured":"Wenhui Yu Xiao Lin Junfeng Ge Wenwu Ou and Zheng Qin. 2020. Semi-supervised collaborative filtering by text-enhanced domain adaptation. In KDD. 2136\u20132144.","DOI":"10.1145\/3394486.3403264"},{"key":"e_1_3_2_1_49_1","volume-title":"Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv preprint arXiv:2402.17152","author":"Zhai Jiaqi","year":"2024","unstructured":"Jiaqi Zhai, Lucy Liao, Xing Liu, Yueming Wang, Rui Li, Xuan Cao, Leon Gao, Zhaojie Gong, Fangda Gu, Michael He, 2024. Actions speak louder than words: Trillion-parameter sequential transducers for generative recommendations. arXiv preprint arXiv:2402.17152 (2024)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"crossref","unstructured":"Xiaohua Zhai Alexander Kolesnikov Neil Houlsby and Lucas Beyer. 2022. Scaling vision transformers. In CVPR. 12104\u201312113.","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"e_1_3_2_1_51_1","volume-title":"Recbole: Towards a unified, comprehensive and efficient framework for recommendation algorithms. In CIKM. 4653\u20134664.","author":"Zhao Wayne\u00a0Xin","year":"2021","unstructured":"Wayne\u00a0Xin Zhao, Shanlei Mu, Yupeng Hou, Zihan Lin, Yushuo Chen, Xingyu Pan, Kaiyuan Li, Yujie Lu, Hui Wang, Changxin Tian, 2021. Recbole: Towards a unified, comprehensive and efficient framework for recommendation algorithms. In CIKM. 4653\u20134664."},{"key":"e_1_3_2_1_52_1","volume-title":"A survey of large language models. arXiv:2303.18223","author":"Zhao Wayne\u00a0Xin","year":"2023","unstructured":"Wayne\u00a0Xin Zhao, Kun Zhou, Junyi Li, Tianyi Tang, Xiaolei Wang, Yupeng Hou, Yingqian Min, Beichen Zhang, Junjie Zhang, Zican Dong, 2023. A survey of large language models. arXiv:2303.18223 (2023)."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"crossref","unstructured":"Kun Zhou Hui Yu Wayne\u00a0Xin Zhao and Ji-Rong Wen. 2022. Filter-enhanced MLP is all you need for sequential recommendation. In WWW. 2388\u20132399.","DOI":"10.1145\/3485447.3512111"}],"event":{"name":"RecSys '24: 18th ACM Conference on Recommender Systems","location":"Bari Italy","acronym":"RecSys '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["18th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688129","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640457.3688129","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:32Z","timestamp":1750294712000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688129"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,8]]},"references-count":52,"alternative-id":["10.1145\/3640457.3688129","10.1145\/3640457"],"URL":"https:\/\/doi.org\/10.1145\/3640457.3688129","relation":{},"subject":[],"published":{"date-parts":[[2024,10,8]]},"assertion":[{"value":"2024-10-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}