{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:10:28Z","timestamp":1765357828374,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":76,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T00:00:00Z","timestamp":1745280000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"DOI":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["No.62176043, No.62072077, and No.U22A2097"],"award-info":[{"award-number":["No.62176043, No.62072077, and No.U22A2097"]}],"id":[{"id":"10.13039\/https:\/\/doi.org\/10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,4,28]]},"DOI":"10.1145\/3696410.3714560","type":"proceedings-article","created":{"date-parts":[[2025,4,22]],"date-time":"2025-04-22T23:08:29Z","timestamp":1745363309000},"page":"2763-2774","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":5,"title":["Biting Off More Than You Can Detect: Retrieval-Augmented Multimodal Experts for Short Video Hate Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0876-0497","authenticated-orcid":false,"given":"Jian","family":"Lang","sequence":"first","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-4977-1657","authenticated-orcid":false,"given":"Rongpei","family":"Hong","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6644-8217","authenticated-orcid":false,"given":"Jin","family":"Xu","sequence":"additional","affiliation":[{"name":"Maynooth University, Maynooth, County Kildare, Ireland"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-9865-6930","authenticated-orcid":false,"given":"Yili","family":"Li","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6415-7558","authenticated-orcid":false,"given":"Xovee","family":"Xu","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8038-8150","authenticated-orcid":false,"given":"Fan","family":"Zhou","sequence":"additional","affiliation":[{"name":"University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]}],"member":"320","published-online":{"date-parts":[[2025,4,22]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.622"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.5267\/j.ijdns.2021.6.013"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"e_1_3_2_1_4_1","article-title":"MFCC based hybrid fingerprinting method for audio classification through LSTM","volume":"12","author":"Banuroopa Kalyanaswamy","year":"2021","unstructured":"Kalyanaswamy Banuroopa and D Shanmuga Priyaa. 2021. MFCC based hybrid fingerprinting method for audio classification through LSTM. International Journal of Nonlinear Analysis and Applications, Vol. 12, Special Issue (2021), 2125--2136.","journal-title":"International Journal of Nonlinear Analysis and Applications"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3614978"},{"key":"e_1_3_2_1_6_1","volume-title":"Uses and gratifications sought by pre-adolescent and adolescent TikTok consumers. Young consumers","author":"Bossen Christina Bucknell","year":"2020","unstructured":"Christina Bucknell Bossen and Rita Kottasz. 2020. Uses and gratifications sought by pre-adolescent and adolescent TikTok consumers. Young consumers, Vol. 21, 4 (2020), 463--478."},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612498"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2022.emnlp-main.22"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3648145"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645675"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01065"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3637528.3672041"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TKDE.2024.3465241"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v17i1.22209"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1609\/icwsm.v11i1.14955"},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"crossref","unstructured":"S. Davis and P. Mermelstein. 1980. Comparison of parametric representations for monosyllabic word recognition in continuously spoken sentences. IEEE Transactions on Acoustics Speech and Signal Processing (1980).","DOI":"10.1109\/TASSP.1980.1163420"},{"key":"e_1_3_2_1_17_1","volume-title":"Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT). 4171--4186","author":"Devlin Jacob","year":"2019","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. In Proceedings of the Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT). 4171--4186."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00957"},{"key":"e_1_3_2_1_19_1","first-page":"4065","article-title":"Dual encoding for video retrieval by text","volume":"44","author":"Dong Jianfeng","year":"2021","unstructured":"Jianfeng Dong, Xirong Li, Chaoxi Xu, Xun Yang, Gang Yang, Xun Wang, and Meng Wang. 2021. Dual encoding for video retrieval by text. IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 44, 8 (2021), 4065--4080.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"e_1_3_2_1_20_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Dosovitskiy Alexey","year":"2020","unstructured":"Alexey Dosovitskiy. 2020. An image is worth 16x16 words: Transformers for image recognition at scale. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01763"},{"key":"e_1_3_2_1_22_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Eigen David","year":"2013","unstructured":"David Eigen, Marc'Aurelio Ranzato, and Ilya Sutskever. 2013. Learning factored representations in a deep mixture of experts. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_23_1","volume-title":"VSE: Improving Visual-Semantic Embeddings with Hard Negatives. In British Machine Vision Conference (BMVC). 12","author":"Faghri Fartash","year":"2018","unstructured":"Fartash Faghri, David J. Fleet, Jamie Ryan Kiros, and Sanja Fidler. 2018. VSE: Improving Visual-Semantic Embeddings with Hard Negatives. In British Machine Vision Conference (BMVC). 12."},{"key":"e_1_3_2_1_24_1","first-page":"1","article-title":"Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity","volume":"23","author":"Fedus William","year":"2022","unstructured":"William Fedus, Barret Zoph, and Noam Shazeer. 2022. Switch transformers: Scaling to trillion parameter models with simple and efficient sparsity. Journal of Machine Learning Research, Vol. 23, 120 (2022), 1--39.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1145\/3232676"},{"key":"e_1_3_2_1_26_1","volume-title":"Advances in Neural Information Processing Systems (Neurips)","volume":"26","author":"Frome Andrea","year":"2013","unstructured":"Andrea Frome, Greg S Corrado, Jon Shlens, Samy Bengio, Jeff Dean, Marc'Aurelio Ranzato, and Tomas Mikolov. 2013. Devise: A deep visual-semantic embedding model. Advances in Neural Information Processing Systems (Neurips), Vol. 26 (2013)."},{"volume-title":"Proceedings of the ACM International Conference on Multimedia (MM).","author":"Gao Zixian","key":"e_1_3_2_1_27_1","unstructured":"Zixian Gao, Disen Hu, Xun Jiang, Huimin Lu, Heng Tao Shen, and Xing Xu. [n.,d.]. Enhanced Experts with Uncertainty-Aware Routing for Multimodal Sentiment Analysis. In Proceedings of the ACM International Conference on Multimedia (MM)."},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1080\/02640410600946860"},{"key":"e_1_3_2_1_29_1","unstructured":"Douglas R Hofstadter. 1995. Fluid concepts and creative analogies: Computer models of the fundamental mechanisms of thought.Basic books."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01278"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.79"},{"key":"e_1_3_2_1_32_1","volume-title":"Social media engagement: What motivates user participation and consumption on YouTube? Computers in human behavior","author":"Khan M Laeeq","year":"2017","unstructured":"M Laeeq Khan. 2017. Social media engagement: What motivates user participation and consumption on YouTube? Computers in human behavior, Vol. 66 (2017), 236--247."},{"volume-title":"Advances in Neural Information Processing Systems (Neurips)","author":"Kiela Douwe","key":"e_1_3_2_1_33_1","unstructured":"Douwe Kiela, Hamed Firooz, Aravind Mohan, Vedanuj Goswami, Amanpreet Singh, Pratik Ringshia, and Davide Testuggine. 2020. The Hateful Memes Challenge: Detecting Hate Speech in Multimodal Memes. In Advances in Neural Information Processing Systems (Neurips), Vol. abs\/2005.04790."},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v27i1.8539"},{"key":"e_1_3_2_1_35_1","volume-title":"Retrieval-Augmented Dynamic Prompt Tuning for Incomplete Multimodal Learning. arXiv preprint arXiv:2501.01120","author":"Lang Jian","year":"2025","unstructured":"Jian Lang, Zhangtao Cheng, Ting Zhong, and Fan Zhou. 2025. Retrieval-Augmented Dynamic Prompt Tuning for Incomplete Multimodal Learning. arXiv preprint arXiv:2501.01120 (2025)."},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"e_1_3_2_1_37_1","volume-title":"GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In International Conference on Learning Representations (ICLR).","author":"Lepikhin Dmitry","year":"2020","unstructured":"Dmitry Lepikhin, HyoukJoong Lee, Yuanzhong Xu, Dehao Chen, Orhan Firat, Yanping Huang, Maxim Krikun, Noam Shazeer, and Zhifeng Chen. 2020. GShard: Scaling Giant Models with Conditional Computation and Automatic Sharding. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_38_1","volume-title":"LLaVA-OneVision: Easy Visual Task Transfer. arXiv preprint arXiv:2408.03326","author":"Li Bo","year":"2024","unstructured":"Bo Li, Yuanhan Zhang, Dong Guo, Renrui Zhang, Feng Li, Hao Zhang, Kaichen Zhang, Yanwei Li, Ziwei Liu, and Chunyuan Li. 2024. LLaVA-OneVision: Easy Visual Task Transfer. arXiv preprint arXiv:2408.03326 (2024)."},{"key":"e_1_3_2_1_39_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.202"},{"key":"e_1_3_2_1_40_1","volume-title":"Proceedings of the ACM International Conference on Multimedia (MM). 1786--1794","author":"Li Xirong","year":"2019","unstructured":"Xirong Li, Chaoxi Xu, Gang Yang, Zhineng Chen, and Jianfeng Dong. 2019. W2vv fully deep learning for ad-hoc video search. In Proceedings of the ACM International Conference on Multimedia (MM). 1786--1794."},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645381"},{"key":"e_1_3_2_1_42_1","volume-title":"International Conference on Learning Representations (ICLR).","author":"Loshchilov Ilya","year":"2018","unstructured":"Ilya Loshchilov and Frank Hutter. 2018. Decoupled weight decay regularization. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_43_1","volume-title":"Cross-token modeling with conditional computation. arXiv preprint arXiv:2109.02008","author":"Lou Yuxuan","year":"2021","unstructured":"Yuxuan Lou, Fuzhao Xue, Zangwei Zheng, and Yang You. 2021. Cross-token modeling with conditional computation. arXiv preprint arXiv:2109.02008 (2021)."},{"key":"e_1_3_2_1_44_1","volume-title":"Advances in Neural Information Processing Systems (Neurips)","volume":"32","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in Neural Information Processing Systems (Neurips), Vol. 32 (2019)."},{"key":"e_1_3_2_1_45_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0221152"},{"key":"e_1_3_2_1_46_1","volume-title":"Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL). 5333--5347","author":"Mei Jingbiao","year":"2023","unstructured":"Jingbiao Mei, Jinghong Chen, Weizhe Lin, Bill Byrne, and Marcus Tomalin. 2023. Improving Hateful Meme Detection through Retrieval-Guided Contrastive Learning. In Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL). 5333--5347."},{"key":"e_1_3_2_1_47_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.291"},{"volume-title":"Complex Networks and Their Applications VIII","author":"Mozafari Marzieh","key":"e_1_3_2_1_48_1","unstructured":"Marzieh Mozafari, Reza Farahbakhsh, and Noel Crespi. 2020a. A BERT-based transfer learning approach for hate speech detection in online social media. In Complex Networks and Their Applications VIII: Volume 1 Proceedings of the Eighth International Conference on Complex Networks and Their Applications COMPLEX NETWORKS. Springer, 928--940."},{"key":"e_1_3_2_1_49_1","doi-asserted-by":"publisher","DOI":"10.1371\/journal.pone.0237861"},{"key":"e_1_3_2_1_50_1","first-page":"9564","article-title":"Multimodal contrastive learning with limoe: the language-image mixture of experts","volume":"35","author":"Mustafa Basil","year":"2022","unstructured":"Basil Mustafa, Carlos Riquelme, Joan Puigcerver, Rodolphe Jenatton, and Neil Houlsby. 2022. Multimodal contrastive learning with limoe: the language-image mixture of experts. Advances in Neural Information Processing Systems (Neurips), Vol. 35 (2022), 9564--9576.","journal-title":"Advances in Neural Information Processing Systems (Neurips)"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.232"},{"key":"e_1_3_2_1_52_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19797-0_3"},{"key":"e_1_3_2_1_53_1","volume-title":"International Conference on Machine Learning (ICML). PMLR, 8748--8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al. 2021. Learning transferable visual models from natural language supervision. In International Conference on Machine Learning (ICML). PMLR, 8748--8763."},{"key":"e_1_3_2_1_54_1","volume-title":"International Conference on Machine Learning (ICML)","volume":"202","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust Speech Recognition via Large-Scale Weak Supervision. In International Conference on Machine Learning (ICML), Vol. 202. PMLR, 28492--28518."},{"key":"e_1_3_2_1_55_1","volume-title":"Depthwise Separable Convolution and Self Attention","author":"Ramadhani Kurniawan Nur","year":"2024","unstructured":"Kurniawan Nur Ramadhani, Rinaldi Munir, and Nugraha Priya Utama. 2024. Improving Video Vision Transformer for Deepfake Video Detection using Facial Landmark, Depthwise Separable Convolution and Self Attention. IEEE Access (2024)."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2577031"},{"key":"e_1_3_2_1_57_1","first-page":"8583","article-title":"Scaling vision with sparse mixture of experts","volume":"34","author":"Riquelme Carlos","year":"2021","unstructured":"Carlos Riquelme, Joan Puigcerver, Basil Mustafa, Maxim Neumann, Rodolphe Jenatton, Andr\u00e9 Susano Pinto, Daniel Keysers, and Neil Houlsby. 2021. Scaling vision with sparse mixture of experts. Advances in Neural Information Processing Systems (Neurips), Vol. 34 (2021), 8583--8595.","journal-title":"Advances in Neural Information Processing Systems (Neurips)"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W17-1101"},{"key":"e_1_3_2_1_59_1","volume-title":"Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations (ICLR).","author":"Shazeer Noam","year":"2016","unstructured":"Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz, Andy Davis, Quoc Le, Geoffrey Hinton, and Jeff Dean. 2016. Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer. In International Conference on Learning Representations (ICLR)."},{"key":"e_1_3_2_1_60_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.findings-emnlp.758"},{"key":"e_1_3_2_1_61_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.312"},{"key":"e_1_3_2_1_62_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCCNT51525.2021.9579881"},{"key":"e_1_3_2_1_63_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681521"},{"key":"e_1_3_2_1_64_1","volume-title":"Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution. arXiv","author":"Wang Peng","year":"2024","unstructured":"Peng Wang, Shuai Bai, Sinan Tan, Shijie Wang, Zhihao Fan, Jinze Bai, Ke-Yang Chen, Xuejing Liu, Jialin Wang, Wenbin Ge, Yang Fan, Kai Dang, Mengfei Du, Xuancheng Ren, Rui Men, Dayiheng Liu, Chang Zhou, Jingren Zhou, and Junyang Lin. 2024a. Qwen2-VL: Enhancing Vision-Language Model's Perception of the World at Any Resolution. arXiv (2024)."},{"key":"e_1_3_2_1_65_1","doi-asserted-by":"publisher","DOI":"10.5555\/2390374.2390377"},{"key":"e_1_3_2_1_66_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645334"},{"key":"e_1_3_2_1_67_1","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645630"},{"key":"e_1_3_2_1_68_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681683"},{"key":"e_1_3_2_1_69_1","volume-title":"Proceedings of the International Conference on Information and Social Science (ICISS). 12--14","author":"Yang Shuai","year":"2019","unstructured":"Shuai Yang, Yuzhen Zhao, and Yifang Ma. 2019. Analysis of the reasons and development of short video application-Taking Tik Tok as an example. In Proceedings of the International Conference on Information and Social Science (ICISS). 12--14."},{"key":"e_1_3_2_1_70_1","volume-title":"Minicpm-v: A gpt-4v level mllm on your phone. arXiv preprint arXiv:2408.01800","author":"Yao Yuan","year":"2024","unstructured":"Yuan Yao, Tianyu Yu, Ao Zhang, Chongyi Wang, Junbo Cui, Hongji Zhu, Tianchi Cai, Haoyu Li, Weilin Zhao, Zhihui He, et al. 2024. Minicpm-v: A gpt-4v level mllm on your phone. arXiv preprint arXiv:2408.01800 (2024)."},{"key":"e_1_3_2_1_71_1","volume-title":"Ruslan Salakhutdinov, and Louis-Philippe Morency.","author":"Yu Haofei","year":"2023","unstructured":"Haofei Yu, Paul Pu Liang, Ruslan Salakhutdinov, and Louis-Philippe Morency. 2023. MMOE: Mixture of Multimodal Interaction Experts. arXiv preprint arXiv:2311.09580 (2023)."},{"key":"e_1_3_2_1_72_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2021.3109102"},{"key":"e_1_3_2_1_73_1","volume-title":"Multi-gate Mixture-of-Contrastive-Experts with Graph-based Gating Mechanism for TV Recommendation. In International Conference on Information and Knowledge Management (CIKM). 4938--4944","author":"Zhang Cong","year":"2023","unstructured":"Cong Zhang, Dongyang Liu, Lin Zuo, Junlan Feng, Chao Deng, Jian Sun, Haitao Zeng, and Yaohong Zhao. 2023. Multi-gate Mixture-of-Contrastive-Experts with Graph-based Gating Mechanism for TV Recommendation. In International Conference on Information and Knowledge Management (CIKM). 4938--4944."},{"key":"e_1_3_2_1_74_1","doi-asserted-by":"publisher","DOI":"10.1145\/3626772.3657929"},{"key":"e_1_3_2_1_75_1","doi-asserted-by":"publisher","DOI":"10.1145\/3433000"},{"key":"e_1_3_2_1_76_1","volume-title":"Is Sora a World Simulator? A Comprehensive Survey on General World Models and Beyond. arXiv","author":"Zhu Zheng","year":"2024","unstructured":"Zheng Zhu, Xiaofeng Wang, Wangbo Zhao, Chen Min, Nianchen Deng, Min Dou, Yuqi Wang, Botian Shi, Kai Wang, Chi Zhang, Yang You, Zhaoxiang Zhang, Dawei Zhao, Liang Xiao, Jian Zhao, Jiwen Lu, and Guan Huang. 2024. Is Sora a World Simulator? A Comprehensive Survey on General World Models and Beyond. arXiv, Vol. abs\/2405.03520 (2024)."}],"event":{"name":"WWW '25: The ACM Web Conference 2025","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"],"location":"Sydney NSW Australia","acronym":"WWW '25"},"container-title":["Proceedings of the ACM on Web Conference 2025"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714560","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696410.3714560","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:33Z","timestamp":1750295913000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696410.3714560"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,22]]},"references-count":76,"alternative-id":["10.1145\/3696410.3714560","10.1145\/3696410"],"URL":"https:\/\/doi.org\/10.1145\/3696410.3714560","relation":{},"subject":[],"published":{"date-parts":[[2025,4,22]]},"assertion":[{"value":"2025-04-22","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}