{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T01:45:28Z","timestamp":1773193528157,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":12,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T00:00:00Z","timestamp":1728345600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,8]]},"DOI":"10.1145\/3640457.3688040","type":"proceedings-article","created":{"date-parts":[[2024,10,8]],"date-time":"2024-10-08T15:39:28Z","timestamp":1728401968000},"page":"778-780","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Enhancing Performance and Scalability of Large-Scale Recommendation Systems with Jagged Flash Attention"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5230-5530","authenticated-orcid":false,"given":"Rengan","family":"Xu","sequence":"first","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0603-4482","authenticated-orcid":false,"given":"Junjie","family":"Yang","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-1918-2471","authenticated-orcid":false,"given":"Yifan","family":"Xu","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8299-2328","authenticated-orcid":false,"given":"Hong","family":"Li","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-0172-8698","authenticated-orcid":false,"given":"Xing","family":"Liu","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7705-9630","authenticated-orcid":false,"given":"Devashish","family":"Shankar","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9986-1091","authenticated-orcid":false,"given":"Haoci","family":"Zhang","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8189-1193","authenticated-orcid":false,"given":"Meng","family":"Liu","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6870-787X","authenticated-orcid":false,"given":"Boyang","family":"Li","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-6608-8441","authenticated-orcid":false,"given":"Yuxi","family":"Hu","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-5745-1580","authenticated-orcid":false,"given":"Mingwei","family":"Tang","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0725-9353","authenticated-orcid":false,"given":"Zehua","family":"Zhang","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9590-9433","authenticated-orcid":false,"given":"Tunhou","family":"Zhang","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-6645-5620","authenticated-orcid":false,"given":"Dai","family":"Li","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0862-7923","authenticated-orcid":false,"given":"Sijia","family":"Chen","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-7824-050X","authenticated-orcid":false,"given":"Gian-Paolo","family":"Musumeci","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-7279-3318","authenticated-orcid":false,"given":"Jiaqi","family":"Zhai","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1646-6635","authenticated-orcid":false,"given":"Bill","family":"Zhu","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-8717-9508","authenticated-orcid":false,"given":"Hong","family":"Yan","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-1504-9013","authenticated-orcid":false,"given":"Srihari","family":"Reddy","sequence":"additional","affiliation":[{"name":"Meta Platforms, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,8]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Flashattention-2: Faster attention with better parallelism and work partitioning. arXiv preprint arXiv:2307.08691","author":"Dao Tri","year":"2023","unstructured":"Tri Dao. 2023. Flashattention-2: Faster attention with better parallelism and work partitioning. arXiv preprint arXiv:2307.08691 (2023)."},{"key":"e_1_3_2_1_2_1","volume-title":"Advances in Neural Information Processing Systems, S.\u00a0Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, D.\u00a0Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.). Vol.\u00a035. Curran Associates","author":"Dao Tri","year":"2022","unstructured":"Tri Dao, Dan Fu, Stefano Ermon, Atri Rudra, and Christopher R\u00e9. 2022. FlashAttention: Fast and Memory-Efficient Exact Attention with IO-Awareness. In Advances in Neural Information Processing Systems, S.\u00a0Koyejo, S.\u00a0Mohamed, A.\u00a0Agarwal, D.\u00a0Belgrave, K.\u00a0Cho, and A.\u00a0Oh (Eds.). Vol.\u00a035. Curran Associates, Inc., 16344\u201316359. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/67d57c32e20fd0a7a302cb81d36e40d5-Paper-Conference.pdf"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3523227.3547387"},{"key":"e_1_3_2_1_4_1","volume-title":"Online normalizer calculation for softmax. arXiv preprint arXiv:1805.02867","author":"Milakov Maxim","year":"2018","unstructured":"Maxim Milakov and Natalia Gimelshein. 2018. Online normalizer calculation for softmax. arXiv preprint arXiv:1805.02867 (2018)."},{"key":"e_1_3_2_1_5_1","volume-title":"Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091","author":"Naumov Maxim","year":"2019","unstructured":"Maxim Naumov, Dheevatsa Mudigere, Hao-Jun\u00a0Michael Shi, Jianyu Huang, Narayanan Sundaraman, Jongsoo Park, Xiaodong Wang, Udit Gupta, Carole-Jean Wu, Alisson\u00a0G Azzolini, 2019. Deep learning recommendation model for personalization and recommendation systems. arXiv preprint arXiv:1906.00091 (2019)."},{"key":"e_1_3_2_1_6_1","volume-title":"Async Learned User Embeddings for Ads Delivery Optimization. arXiv preprint arXiv:2406.05898","author":"Tang Mingwei","year":"2024","unstructured":"Mingwei Tang, Meng Liu, Hong Li, Junjie Yang, Chenglin Wei, Boyang Li, Dai Li, Rengan Xu, Yifan Xu, Zehua Zhang, 2024. Async Learned User Embeddings for Ads Delivery Optimization. arXiv preprint arXiv:2406.05898 (2024)."},{"key":"e_1_3_2_1_7_1","unstructured":"PyTorch Team. 2021. The nestedtensor package prototype:Readme.md.https:\/\/github.com\/pytorch\/nestedtensor\/blob\/master\/nestedtensor\/csrc\/README.md."},{"key":"e_1_3_2_1_8_1","unstructured":"Tensorflow Team. 2022. Ragged Tensors.https:\/\/www.tensorflow.org\/api_docs\/python\/tf\/RaggedTensor?version=nightly."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_2_1_10_1","volume-title":"Actions Speak Louder than Words: Trillion-Parameter Sequential Transducers for Generative Recommendations. arXiv preprint arXiv:2402.17152","author":"Zhai Jiaqi","year":"2024","unstructured":"Jiaqi Zhai, Lucy Liao, Xing Liu, Yueming Wang, Rui Li, Xuan Cao, Leon Gao, Zhaojie Gong, Fangda Gu, Michael He, 2024. Actions Speak Louder than Words: Trillion-Parameter Sequential Transducers for Generative Recommendations. arXiv preprint arXiv:2402.17152 (2024)."},{"key":"e_1_3_2_1_11_1","volume-title":"Wukong: Towards a Scaling Law for Large-Scale Recommendation. arXiv preprint arXiv:2403.02545","author":"Zhang Buyun","year":"2024","unstructured":"Buyun Zhang, Liang Luo, Yuxin Chen, Jade Nie, Xi Liu, Daifeng Guo, Yanli Zhao, Shen Li, Yuchen Hao, Yantao Yao, 2024. Wukong: Towards a Scaling Law for Large-Scale Recommendation. arXiv preprint arXiv:2403.02545 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"DHEN: A deep and hierarchical ensemble network for large-scale click-through rate prediction. arXiv preprint arXiv:2203.11014","author":"Zhang Buyun","year":"2022","unstructured":"Buyun Zhang, Liang Luo, Xi Liu, Jay Li, Zeliang Chen, Weilin Zhang, Xiaohan Wei, Yuchen Hao, Michael Tsang, Wenjun Wang, 2022. DHEN: A deep and hierarchical ensemble network for large-scale click-through rate prediction. arXiv preprint arXiv:2203.11014 (2022)."}],"event":{"name":"RecSys '24: 18th ACM Conference on Recommender Systems","location":"Bari Italy","acronym":"RecSys '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web","SIGAI ACM Special Interest Group on Artificial Intelligence","SIGKDD ACM Special Interest Group on Knowledge Discovery in Data","SIGIR ACM Special Interest Group on Information Retrieval","SIGCHI ACM Special Interest Group on Computer-Human Interaction"]},"container-title":["18th ACM Conference on Recommender Systems"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688040","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3640457.3688040","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T00:58:29Z","timestamp":1750294709000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3640457.3688040"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,8]]},"references-count":12,"alternative-id":["10.1145\/3640457.3688040","10.1145\/3640457"],"URL":"https:\/\/doi.org\/10.1145\/3640457.3688040","relation":{},"subject":[],"published":{"date-parts":[[2024,10,8]]},"assertion":[{"value":"2024-10-08","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}