{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:41:43Z","timestamp":1765503703875,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":61,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,10]]},"DOI":"10.1145\/3746252.3761332","type":"proceedings-article","created":{"date-parts":[[2025,11,8]],"date-time":"2025-11-08T00:18:04Z","timestamp":1762561084000},"page":"1839-1849","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Crocodile: Cross Experts Covariance for Disentangled Learning in Multi-Domain Recommendation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-9745-7820","authenticated-orcid":false,"given":"Zhutian","family":"Lin","sequence":"first","affiliation":[{"name":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2697-7012","authenticated-orcid":false,"given":"Junwei","family":"Pan","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7723-6511","authenticated-orcid":false,"given":"Haibin","family":"Yu","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1521-9542","authenticated-orcid":false,"given":"Xi","family":"Xiao","sequence":"additional","affiliation":[{"name":"Shenzhen International Graduate School, Tsinghua University, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3766-0300","authenticated-orcid":false,"given":"Ximei","family":"Wang","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-1519-0227","authenticated-orcid":false,"given":"Zhixiang","family":"Feng","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-7832-0501","authenticated-orcid":false,"given":"Shifeng","family":"Wen","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6215-272X","authenticated-orcid":false,"given":"Shudong","family":"Huang","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-2973-9167","authenticated-orcid":false,"given":"Dapeng","family":"Liu","sequence":"additional","affiliation":[{"name":"Tencent Inc., Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3991-8161","authenticated-orcid":false,"given":"Lei","family":"Xiao","sequence":"additional","affiliation":[{"name":"Tencent, Shenzhen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,11,10]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Vicreg: Variance invariance-covariance regularization for self-supervised learning. arXiv preprint arXiv:2105.04906","author":"Bardes Adrien","year":"2021","unstructured":"Adrien Bardes, Jean Ponce, and Yann LeCun. 2021. Vicreg: Variance invariance-covariance regularization for self-supervised learning. arXiv preprint arXiv:2105.04906 (2021)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1145\/1454008.1454009"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539597.3570366"},{"key":"e_1_3_2_1_4_1","volume-title":"Multitask learning. Machine learning 28, 1","author":"Caruana Rich","year":"1997","unstructured":"Rich Caruana. 1997. Multitask learning. Machine learning 28, 1 (1997), 41--75."},{"key":"e_1_3_2_1_5_1","first-page":"1","article-title":"ReduNet: A white-box deep network from the principle of maximizing rate reduction","volume":"23","author":"Ryan Chan Kwan Ho","year":"2022","unstructured":"Kwan Ho Ryan Chan, Yaodong Yu, Chong You, Haozhi Qi, John Wright, and Yi Ma. 2022. ReduNet: A white-box deep network from the principle of maximizing rate reduction. Journal of machine learning research 23, 114 (2022), 1--103.","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599884"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557090"},{"key":"e_1_3_2_1_8_1","volume-title":"Infogan: Interpretable representation learning by information maximizing generative adversarial nets. Advances in neural information processing systems 29","author":"Chen Xi","year":"2016","unstructured":"Xi Chen, Yan Duan, Rein Houthooft, John Schulman, Ilya Sutskever, and Pieter Abbeel. 2016. Infogan: Interpretable representation learning by information maximizing generative adversarial nets. Advances in neural information processing systems 29 (2016)."},{"key":"e_1_3_2_1_9_1","volume-title":"Pong Eksombatchai, Jaewon Yang, Yi-Ping Hsu, et al.","author":"Chen Xiangyi","year":"2025","unstructured":"Xiangyi Chen, Kousik Rajesh, MatthewLawhon, ZelunWang, Hanyu Li, Haomiao Li, Saurabh Vishwas Joshi, Pong Eksombatchai, Jaewon Yang, Yi-Ping Hsu, et al. 2025. PinFM: Foundation Model for User Activity Sequences at a Billion-scale Visual Discovery Platform. arXiv preprint arXiv:2507.12704 (2025)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3522762"},{"key":"e_1_3_2_1_11_1","first-page":"34600","article-title":"On the representation collapse of sparse mixture of experts","volume":"35","author":"Chi Zewen","year":"2022","unstructured":"Zewen Chi, Li Dong, Shaohan Huang, Damai Dai, Shuming Ma, Barun Patra, Saksham Singhal, Payal Bajaj, Xia Song, Xian-Ling Mao, et al. 2022. On the representation collapse of sparse mixture of experts. Advances in Neural Information Processing Systems 35 (2022), 34600--34613.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_12_1","volume-title":"International Conference on Machine Learning. PMLR, 3015--3024","author":"Ermolov Aleksandr","year":"2021","unstructured":"Aleksandr Ermolov, Aliaksandr Siarohin, Enver Sangineto, and Nicu Sebe. 2021. Whitening for self-supervised representation learning. In International Conference on Machine Learning. PMLR, 3015--3024."},{"key":"e_1_3_2_1_13_1","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research 23, 120 (2022), 1--39.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557624"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1145\/3627673.3679615"},{"key":"e_1_3_2_1_16_1","volume-title":"On the Embedding Collapse when Scaling up Recommendation Models. arXiv preprint arXiv:2310.04400","author":"Guo Xingzhuo","year":"2023","unstructured":"Xingzhuo Guo, Junwei Pan, Ximei Wang, Baixu Chen, Jie Jiang, and Mingsheng Long. 2023. On the Embedding Collapse when Scaling up Recommendation Models. arXiv preprint arXiv:2310.04400 (2023)."},{"key":"e_1_3_2_1_17_1","volume-title":"On the Embedding Collapse when Scaling up Recommendation Models. arXiv preprint arXiv:2310.04400","author":"Guo Xingzhuo","year":"2023","unstructured":"Xingzhuo Guo, Junwei Pan, Ximei Wang, Baixu Chen, Jie Jiang, and Mingsheng Long. 2023. On the Embedding Collapse when Scaling up Recommendation Models. arXiv preprint arXiv:2310.04400 (2023)."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00946"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/2959100.2959134"},{"key":"e_1_3_2_1_20_1","volume-title":"Disentangled representations from non-disentangled models. arXiv preprint arXiv:2102.06204","author":"Khrulkov Valentin","year":"2021","unstructured":"Valentin Khrulkov, Leyla Mirvakhabova, Ivan Oseledets, and Artem Babenko. 2021. Disentangled representations from non-disentangled models. arXiv preprint arXiv:2102.06204 (2021)."},{"key":"e_1_3_2_1_21_1","volume-title":"Gshard: Scaling giant models with conditional computation and automatic sharding. arXiv preprint arXiv:2006.16668","author":"Lepikhin Dmitry","year":"2020","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2020. Gshard: Scaling giant models with conditional computation and automatic sharding. arXiv preprint arXiv:2006.16668 (2020)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00846"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591944"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615137"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3701716.3715223"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3217449"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i3.16312"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531975"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.14778\/3231751.3231765"},{"key":"e_1_3_2_1_30_1","volume-title":"andWenwu Zhu","author":"Ma Jianxin","year":"2019","unstructured":"Jianxin Ma, Chang Zhou, Peng Cui, Hongxia Yang, andWenwu Zhu. 2019. Learning disentangled representations for recommendation. Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1145\/3209978.3210104"},{"key":"e_1_3_2_1_32_1","volume-title":"FinalMLP: An Enhanced Two-Stream MLP Model for CTR Prediction. arXiv preprint arXiv:2304.00902","author":"Mao Kelong","year":"2023","unstructured":"Kelong Mao, Jieming Zhu, Liangcai Su, Guohao Cai, Yuru Li, and Zhenhua Dong. 2023. FinalMLP: An Enhanced Two-Stream MLP Model for CTR Prediction. arXiv preprint arXiv:2304.00902 (2023)."},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614977"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614977"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1145\/3178876.3186040"},{"key":"e_1_3_2_1_36_1","volume-title":"Ad Recommendation in a Collapsed and Entangled World. arXiv preprint arXiv:2403.00793","author":"Pan Junwei","year":"2024","unstructured":"Junwei Pan, Wei Xue, Ximei Wang, Haibin Yu, Xun Liu, Shijie Quan, Xueming Qiu, Dapeng Liu, Lei Xiao, and Jie Jiang. 2024. Ad Recommendation in a Collapsed and Entangled World. arXiv preprint arXiv:2403.00793 (2024)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2016.0151"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2010.127"},{"key":"e_1_3_2_1_39_1","volume-title":"Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538","author":"Shazeer Noam","year":"2017","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2017. Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. arXiv preprint arXiv:1701.06538 (2017)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1145\/3459637.3481941"},{"key":"e_1_3_2_1_41_1","volume-title":"Tim GJ Rudner, and Yann LeCun","author":"Shwartz-Ziv Ravid","year":"2024","unstructured":"Ravid Shwartz-Ziv, Randall Balestriero, Kenji Kawaguchi, Tim GJ Rudner, and Yann LeCun. 2024. An Information Theory Perspective on Variance-Invariance-Covariance Regularization. Advances in Neural Information Processing Systems 36 (2024)."},{"key":"e_1_3_2_1_42_1","volume-title":"STEM: Unleashing the Power of Embeddings for Multi-task Recommendation. arXiv preprint arXiv:2308.13537","author":"Su Liangcai","year":"2023","unstructured":"Liangcai Su, Junwei Pan, Ximei Wang, Xi Xiao, Shijie Quan, Xihua Chen, and Jie Jiang. 2023. STEM: Unleashing the Power of Embeddings for Multi-task Recommendation. arXiv preprint arXiv:2308.13537 (2023)."},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"crossref","unstructured":"Hongyan Tang Junning Liu Ming Zhao and Xudong Gong. 2020. Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations. In RecSys. ACM 269--278.","DOI":"10.1145\/3383313.3412236"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3539618.3591736"},{"key":"e_1_3_2_1_45_1","volume-title":"Proceedings of the web conference","author":"Shivanna Rakesh","year":"2021","unstructured":"RuoxiWang, Rakesh Shivanna, Derek Cheng, Sagar Jain, Dong Lin, Lichan Hong, and Ed Chi. 2021. Dcn v2: Improved deep & cross network and practical lessons for web-scale learning to rank systems. In Proceedings of the web conference 2021. 1785--1797."},{"key":"e_1_3_2_1_46_1","volume-title":"Disentangled representation learning. arXiv preprint arXiv:2211.11695","author":"Wang Xin","year":"2022","unstructured":"Xin Wang, Hong Chen, Si'ao Tang, Zihao Wu, and Wenwu Zhu. 2022. Disentangled representation learning. arXiv preprint arXiv:2211.11695 (2022)."},{"key":"e_1_3_2_1_47_1","volume-title":"Proceedings of the AAAI Conference on Artificial Intelligence","volume":"38","author":"Pan Junwei","year":"2024","unstructured":"XimeiWang, Junwei Pan, Xingzhuo Guo, Dapeng Liu, and Jie Jiang. 2024. Decoupled training: return of frustratingly easy multi-domain learning. In Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 38. 15644--15652."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00921"},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1145\/3511808.3557541"},{"volume-title":"Forty-second International Conference on Machine Learning.","author":"Yin Mingjia","key":"e_1_3_2_1_50_1","unstructured":"Mingjia Yin, Junwei Pan, HaoWang, XimeiWang, Shangyu Zhang, Jie Jiang, Defu Lian, and Enhong Chen. [n.d.]. From Feature Interaction to Feature Generation: A Generative Paradigm of CTR Prediction Models. In Forty-second International Conference on Machine Learning."},{"key":"e_1_3_2_1_51_1","volume-title":"MichiGAN: sampling from disentangled representations of single-cell data using generative adversarial networks. Genome biology 22, 1","author":"Yu Hengshi","year":"2021","unstructured":"Hengshi Yu and Joshua D Welch. 2021. MichiGAN: sampling from disentangled representations of single-cell data using generative adversarial networks. Genome biology 22, 1 (2021), 1--26."},{"key":"e_1_3_2_1_52_1","volume-title":"Chong You, Chaobing Song, and Yi Ma.","author":"Yu Yaodong","year":"2020","unstructured":"Yaodong Yu, Kwan Ho Ryan Chan, Chong You, Chaobing Song, and Yi Ma. 2020. Learning diverse and discriminative representations via the principle of maximal coding rate reduction. Advances in neural information processing systems 33 (2020), 9422--9434."},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1145\/3488560.3498479"},{"key":"e_1_3_2_1_54_1","unstructured":"Shangyu Zhang Shijie Quan Zhongren Wang Junwei Pan Tianqu Zhuang Bo Fu Yilong Sun Jieying Lin Jushuo Chen Xiaotian Li et al. 2025. Large Foundation Model for Ads Recommendation. arXiv preprint arXiv:2508.14948 (2025)."},{"key":"e_1_3_2_1_55_1","volume-title":"Scaling User Modeling: Large-scale Online User Representations for Ads Personalization in Meta. In Companion Proceedings of the ACM on Web Conference","author":"Zhang Wei","year":"2024","unstructured":"Wei Zhang, Dai Li, Chen Liang, Fang Zhou, Zhongke Zhang, Xuewei Wang, Ru Li, Yi Zhou, Yaning Huang, Dong Liang, et al. 2024. Scaling User Modeling: Large-scale Online User Representations for Ads Personalization in Meta. In Companion Proceedings of the ACM on Web Conference 2024. 47--55."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1145\/3543507.3583263"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM51629.2021.00101"},{"key":"e_1_3_2_1_58_1","volume-title":"Temporal Interest Network for Click-Through Rate Prediction. The Web Conference","author":"Zhou Haolin","year":"2024","unstructured":"Haolin Zhou, Junwei Pan, Xinyi Zhou, Xihua Chen, Jie Jiang, Xiaofeng Gao, and Guihai Chen. 2024. Temporal Interest Network for Click-Through Rate Prediction. The Web Conference (2024)."},{"key":"e_1_3_2_1_59_1","volume-title":"HiNet: A Novel Multi-Scenario & Multi-Task Learning Approach with Hierarchical Information Extraction. arXiv preprint arXiv:2303.06095","author":"Zhou Jie","year":"2023","unstructured":"Jie Zhou, Xianshuai Cao, Wenhao Li, Kun Zhang, Chuan Luo, and Qian Yu. 2023. HiNet: A Novel Multi-Scenario & Multi-Task Learning Approach with Hierarchical Information Extraction. arXiv preprint arXiv:2303.06095 (2023)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.1145\/3477495.3531723"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645635"}],"event":{"name":"CIKM '25: The 34th ACM International Conference on Information and Knowledge Management","sponsor":["SIGIR ACM Special Interest Group on Information Retrieval","SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Seoul Republic of Korea","acronym":"CIKM '25"},"container-title":["Proceedings of the 34th ACM International Conference on Information and Knowledge Management"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746252.3761332","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,12]],"date-time":"2025-12-12T01:38:20Z","timestamp":1765503500000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746252.3761332"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,10]]},"references-count":61,"alternative-id":["10.1145\/3746252.3761332","10.1145\/3746252"],"URL":"https:\/\/doi.org\/10.1145\/3746252.3761332","relation":{},"subject":[],"published":{"date-parts":[[2025,11,10]]},"assertion":[{"value":"2025-11-10","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}