{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,13]],"date-time":"2026-04-13T15:59:21Z","timestamp":1776095961344,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":47,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,5,13]],"date-time":"2024-05-13T00:00:00Z","timestamp":1715558400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"open project of Sichuan Provincial Key Laboratory of Philosophy, the Social Science for Language Intelligence in Special Education","award":["YYZN-2023-1"],"award-info":[{"award-number":["YYZN-2023-1"]}]},{"DOI":"10.13039\/501100006374","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62176187"],"award-info":[{"award-number":["62176187"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100006374","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2022YFB3103602"],"award-info":[{"award-number":["2022YFB3103602"]}],"id":[{"id":"10.13039\/501100006374","id-type":"DOI","asserted-by":"publisher"}]},{"name":"CCF-Baidu Open Fund"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,5,13]]},"DOI":"10.1145\/3589334.3645677","type":"proceedings-article","created":{"date-parts":[[2024,5,8]],"date-time":"2024-05-08T07:08:13Z","timestamp":1715152093000},"page":"4395-4406","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["MMLSCU: A Dataset for Multi-modal Multi-domain Live Streaming Comment Understanding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-7379-7478","authenticated-orcid":false,"given":"Zixiang","family":"Meng","sequence":"first","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-0530-9297","authenticated-orcid":false,"given":"Qiang","family":"Gao","sequence":"additional","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-3092-6333","authenticated-orcid":false,"given":"Di","family":"Guo","sequence":"additional","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5068-050X","authenticated-orcid":false,"given":"Yunlong","family":"Li","sequence":"additional","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0513-5540","authenticated-orcid":false,"given":"Bobo","family":"Li","sequence":"additional","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3026-6347","authenticated-orcid":false,"given":"Hao","family":"Fei","sequence":"additional","affiliation":[{"name":"National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6192-1194","authenticated-orcid":false,"given":"Shengqiong","family":"Wu","sequence":"additional","affiliation":[{"name":"Sea-NExT Joint Lab, National University of Singapore, Singapore, Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1816-1761","authenticated-orcid":false,"given":"Fei","family":"Li","sequence":"additional","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-6543-2548","authenticated-orcid":false,"given":"Chong","family":"Teng","sequence":"additional","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9613-5927","authenticated-orcid":false,"given":"Donghong","family":"Ji","sequence":"additional","affiliation":[{"name":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, Wuhan, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,5,13]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIoT48696.2020.9089607"},{"key":"e_1_3_2_2_2_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72."},{"key":"e_1_3_2_2_3_1","volume-title":"E-Sports Talent Scouting Based on Multimodal Twitch Stream Data. CoRR","author":"Belova Anna","year":"2019","unstructured":"Anna Belova, Wen He, and Ziyi Zhong. 2019. E-Sports Talent Scouting Based on Multimodal Twitch Stream Data. CoRR, Vol. abs\/1907.01615 (2019). showeprint[arXiv]1907.01615 http:\/\/arxiv.org\/abs\/1907.01615"},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3210825.3210833"},{"key":"e_1_3_2_2_5_1","unstructured":"Michael Bratman. 1987. Intention plans and practical reason. (1987)."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"crossref","unstructured":"Jieting Chen Junkai Ding Wenping Chen and Qin Jin. 2023. Knowledge Enhanced Model for Live Video Comment Generation. arxiv: 2304.14657 [cs.CV]","DOI":"10.1109\/ICME55011.2023.00387"},{"key":"e_1_3_2_2_7_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_8_1","volume-title":"EVA: Exploring the Limits of Masked Visual Representation Learning at Scale. arxiv: 2211.07636 [cs.CV]","author":"Fang Yuxin","year":"2022","unstructured":"Yuxin Fang, Wen Wang, Binhui Xie, Quan Sun, Ledell Wu, Xinggang Wang, Tiejun Huang, Xinlong Wang, and Yue Cao. 2022. EVA: Exploring the Limits of Masked Visual Representation Learning at Scale. arxiv: 2211.07636 [cs.CV]"},{"key":"e_1_3_2_2_9_1","volume-title":"Armand Joulin, and Ishan Misra.","author":"Girdhar Rohit","year":"2023","unstructured":"Rohit Girdhar, Alaaeldin El-Nouby, Zhuang Liu, Mannat Singh, Kalyan Vasudev Alwala, Armand Joulin, and Ishan Misra. 2023. ImageBind: One Embedding Space To Bind Them All. arxiv: 2305.05665 [cs.CV]"},{"key":"e_1_3_2_2_10_1","volume-title":"Macro-average: rare types are important too. arXiv preprint arXiv:2104.05700","author":"Gowda Thamme","year":"2021","unstructured":"Thamme Gowda, Weiqiu You, Constantine Lignos, and Jonathan May. 2021. Macro-average: rare types are important too. arXiv preprint arXiv:2104.05700 (2021)."},{"key":"e_1_3_2_2_11_1","volume-title":"large minibatch sgd: Training imagenet in 1 hour. arXiv preprint arXiv:1706.02677","author":"Goyal Priya","year":"2017","unstructured":"Priya Goyal, Piotr Doll\u00e1r, Ross Girshick, Pieter Noordhuis, Lukasz Wesolowski, Aapo Kyrola, Andrew Tulloch, Yangqing Jia, and Kaiming He. 2017. Accurate, large minibatch sgd: Training imagenet in 1 hour. arXiv preprint arXiv:1706.02677 (2017)."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.02039"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413678"},{"key":"e_1_3_2_2_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_15_1","first-page":"1","article-title":"Exploring the emerging type of comment for online videos: Danmu","volume":"12","author":"He Ming","year":"2017","unstructured":"Ming He, Yong Ge, Enhong Chen, Qi Liu, and Xuesong Wang. 2017. Exploring the emerging type of comment for online videos: Danmu. ACM Transactions on the Web (TWEB), Vol. 12, 1 (2017), 1--33.","journal-title":"ACM Transactions on the Web (TWEB)"},{"key":"e_1_3_2_2_16_1","volume-title":"Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685","author":"Hu Edward J","year":"2021","unstructured":"Edward J Hu, Yelong Shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2021. Lora: Low-rank adaptation of large language models. arXiv preprint arXiv:2106.09685 (2021)."},{"key":"e_1_3_2_2_17_1","volume-title":"Sentence Punctuation for Collaborative Commentary Generation in Esports Live-Streaming. In 2022 IEEE International Conference on Consumer Electronics (ICCE). IEEE, 1--2.","author":"Huang Hong","year":"2022","unstructured":"Hong Huang, Junjie H Xu, Xiaoling Ling, and Pujana Paliyawan. 2022. Sentence Punctuation for Collaborative Commentary Generation in Esports Live-Streaming. In 2022 IEEE International Conference on Consumer Electronics (ICCE). IEEE, 1--2."},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.inlg-1.11"},{"key":"e_1_3_2_2_19_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1145\/3391614.3393659"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338286.3340144"},{"key":"e_1_3_2_2_22_1","unstructured":"Junnan Li Dongxu Li Silvio Savarese and Steven Hoi. 2023. BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models. arxiv: 2301.12597 [cs.CV]"},{"key":"e_1_3_2_2_23_1","volume-title":"Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81.","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text summarization branches out. 74--81."},{"key":"e_1_3_2_2_24_1","volume-title":"Yejin Choi, and Hannaneh Hajishirzi.","author":"Liu Jiacheng","year":"2021","unstructured":"Jiacheng Liu, Alisa Liu, Ximing Lu, Sean Welleck, Peter West, Ronan Le Bras, Yejin Choi, and Hannaneh Hajishirzi. 2021. Generated knowledge prompting for commonsense reasoning. arXiv preprint arXiv:2110.08387 (2021)."},{"key":"e_1_3_2_2_25_1","volume-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems","author":"Lu Jiasen","year":"2019","unstructured":"Jiasen Lu, Dhruv Batra, Devi Parikh, and Stefan Lee. 2019. Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1145\/3173574.3174040"},{"key":"e_1_3_2_2_27_1","volume-title":"UniVL: A Unified Video and Language Pre-Training Model for Multimodal Understanding and Generation. arxiv","author":"Luo Huaishao","year":"2002","unstructured":"Huaishao Luo, Lei Ji, Botian Shi, Haoyang Huang, Nan Duan, Tianrui Li, Jason Li, Taroon Bharti, and Ming Zhou. 2020. UniVL: A Unified Video and Language Pre-Training Model for Multimodal Understanding and Generation. arxiv: 2002.06353 [cs.CV]"},{"key":"e_1_3_2_2_28_1","volume-title":"LiveBot: Generating Live Video Comments Based on Visual and Textual Contexts. arxiv","author":"Ma Shuming","year":"1809","unstructured":"Shuming Ma, Lei Cui, Damai Dai, Furu Wei, and Xu Sun. 2018. LiveBot: Generating Live Video Comments Based on Visual and Textual Contexts. arxiv: 1809.04938 [cs.CL]"},{"key":"e_1_3_2_2_29_1","first-page":"27730","article-title":"Training language models to follow instructions with human feedback","volume":"35","author":"Ouyang Long","year":"2022","unstructured":"Long Ouyang, Jeffrey Wu, Xu Jiang, Diogo Almeida, Carroll Wainwright, Pamela Mishkin, Chong Zhang, Sandhini Agarwal, Katarina Slama, Alex Ray, et al. 2022. Training language models to follow instructions with human feedback. Advances in Neural Information Processing Systems, Vol. 35 (2022), 27730--27744.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3338286.3340120"},{"key":"e_1_3_2_2_31_1","volume-title":"Prompting contrastive explanations for commonsense reasoning tasks. arXiv preprint arXiv:2106.06823","author":"Paranjape Bhargavi","year":"2021","unstructured":"Bhargavi Paranjape, Julian Michael, Marjan Ghazvininejad, Luke Zettlemoyer, and Hannaneh Hajishirzi. 2021. Prompting contrastive explanations for commonsense reasoning tasks. arXiv preprint arXiv:2106.06823 (2021)."},{"key":"e_1_3_2_2_32_1","volume-title":"Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, Gretchen Krueger, and Ilya Sutskever. 2021. Learning Transferable Visual Models From Natural Language Supervision. In Proceedings of the 38th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 139), Marina Meila and Tong Zhang (Eds.). PMLR, 8748--8763. https:\/\/proceedings.mlr.press\/v139\/radford21a.html"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.5555\/3455716.3455856"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.214"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aiide.v16i1.7439"},{"key":"e_1_3_2_2_36_1","volume-title":"Collection and validation of psychophysiological data from professional and amateur players: A multimodal esports dataset. arXiv preprint arXiv:2011.00958","author":"Smerdov Anton","year":"2020","unstructured":"Anton Smerdov, Bo Zhou, Paul Lukowicz, and Andrey Somov. 2020. Collection and validation of psychophysiological data from professional and amateur players: A multimodal esports dataset. arXiv preprint arXiv:2011.00958 (2020)."},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1145\/2465958.2465971"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00513"},{"key":"e_1_3_2_2_39_1","unstructured":"Hugo Touvron Louis Martin Kevin Stone Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra Prajjwal Bhargava Shruti Bhosale et al. 2023. Llama 2: Open foundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288 (2023)."},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1656"},{"key":"e_1_3_2_2_41_1","doi-asserted-by":"publisher","DOI":"10.1145\/3394171.3413890"},{"key":"e_1_3_2_2_42_1","volume-title":"Chi, Quoc Le, and Denny Zhou","author":"Wei Jason","year":"2023","unstructured":"Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, and Denny Zhou. 2023. Chain-of-Thought Prompting Elicits Reasoning in Large Language Models. arxiv: 2201.11903 [cs.CL]"},{"key":"e_1_3_2_2_43_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2022.103254"},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ipm.2021.102687"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1145\/3576840.3578334"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"crossref","unstructured":"Hang Zhang Xin Li and Lidong Bing. 2023 a. Video-LLaMA: An Instruction-tuned Audio-Visual Language Model for Video Understanding. arxiv: 2306.02858 [cs.CL]","DOI":"10.18653\/v1\/2023.emnlp-demo.49"},{"key":"e_1_3_2_2_47_1","volume-title":"2023 b. Multimodal chain-of-thought reasoning in language models. arXiv preprint arXiv:2302.00923","author":"Zhang Zhuosheng","year":"2023","unstructured":"Zhuosheng Zhang, Aston Zhang, Mu Li, Hai Zhao, George Karypis, and Alex Smola. 2023 b. Multimodal chain-of-thought reasoning in language models. arXiv preprint arXiv:2302.00923 (2023)."}],"event":{"name":"WWW '24: The ACM Web Conference 2024","location":"Singapore Singapore","acronym":"WWW '24","sponsor":["SIGWEB ACM Special Interest Group on Hypertext, Hypermedia, and Web"]},"container-title":["Proceedings of the ACM Web Conference 2024"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645677","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3589334.3645677","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,22]],"date-time":"2025-08-22T00:30:50Z","timestamp":1755822650000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3589334.3645677"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,13]]},"references-count":47,"alternative-id":["10.1145\/3589334.3645677","10.1145\/3589334"],"URL":"https:\/\/doi.org\/10.1145\/3589334.3645677","relation":{},"subject":[],"published":{"date-parts":[[2024,5,13]]},"assertion":[{"value":"2024-05-13","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}