{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T11:35:31Z","timestamp":1773228931133,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":40,"publisher":"ACM","funder":[{"DOI":"10.13039\/501100006465","name":"Korea Creative Content Agency","doi-asserted-by":"publisher","award":["RS-2024-00441523"],"award-info":[{"award-number":["RS-2024-00441523"]}],"id":[{"id":"10.13039\/501100006465","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3728423.3759412","type":"proceedings-article","created":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T15:32:07Z","timestamp":1759937527000},"page":"31-38","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Single-anchored Multi-modal Dense Video Captioning for Esports Broadcasts Commentaries"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9810-5301","authenticated-orcid":false,"given":"Ari","family":"Yu","sequence":"first","affiliation":[{"name":"Electronics and Telecommunications Research Institute, Daejeon, Republic of Korea and University of Science and Technology, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6913-513X","authenticated-orcid":false,"given":"Jinwoo","family":"Hyun","sequence":"additional","affiliation":[{"name":"Institute for Basic Science, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-1128-700X","authenticated-orcid":false,"given":"Hyeong-Gyu","family":"Jang","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute, Daejeon, Republic of Korea and University of Science and Technology, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-2563-6943","authenticated-orcid":false,"given":"Sung-Yun","family":"Park","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute, Daejeon, Republic of Korea and University of Science and Technology, Daejeon, Republic of Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-3793-2608","authenticated-orcid":false,"given":"Sang-Kwang","family":"Lee","sequence":"additional","affiliation":[{"name":"Electronics and Telecommunications Research Institute, Daejeon, Republic of Korea and University of Science and Technology, Daejeon, Republic of Korea"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","doi-asserted-by":"publisher","DOI":"10.1145\/3665026.3665029"},{"key":"e_1_3_2_2_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.572"},{"key":"e_1_3_2_2_3_1","volume-title":"Proceedings of the Acl Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization. 65-72","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the Acl Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization. 65-72."},{"key":"e_1_3_2_2_4_1","doi-asserted-by":"publisher","DOI":"10.1145\/3633516"},{"key":"e_1_3_2_2_5_1","volume-title":"International Conference on Machine Learning. PMLR, 1597-1607","author":"Chen Ting","year":"2020","unstructured":"Ting Chen, Simon Kornblith, Mohammad Norouzi, and Geoffrey Hinton. 2020. A simple framework for contrastive learning of visual representations. In International Conference on Machine Learning. PMLR, 1597-1607."},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01314"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1145\/3347318.3355525"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689061.3689074"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02193"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00506"},{"key":"e_1_3_2_2_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"e_1_3_2_2_13_1","volume-title":"Multimodal pretraining for dense video captioning. arXiv preprint arXiv:2011.11760","author":"Huang Gabriel","year":"2020","unstructured":"Gabriel Huang, Bo Pang, Zhenhai Zhu, Clara Rivera, and Radu Soricut. 2020. Multimodal pretraining for dense video captioning. arXiv preprint arXiv:2011.11760 (2020)."},{"key":"e_1_3_2_2_14_1","volume-title":"Proceedings of the IEEE\/CVF conference on Computer Vision and Pattern Recognition Workshops. 958-959","author":"Iashin Vladimir","year":"2020","unstructured":"Vladimir Iashin and Esa Rahtu. 2020. Multi-modal dense video captioning. In Proceedings of the IEEE\/CVF conference on Computer Vision and Pattern Recognition Workshops. 958-959."},{"key":"e_1_3_2_2_15_1","unstructured":"Kazuya Kawakami. 2008. Supervised sequence labelling with recurrent neural networks. Ph.D. Dissertation. Ph. D. thesis."},{"key":"e_1_3_2_2_16_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1114"},{"key":"e_1_3_2_2_19_1","volume-title":"International Conference on Machine Learning. PMLR","author":"Li Junnan","year":"2023","unstructured":"Junnan Li, Dongxu Li, Silvio Savarese, and Steven Hoi. 2023. Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models. In International Conference on Machine Learning. PMLR, 19730-19742."},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02095"},{"key":"e_1_3_2_2_21_1","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin Chin-Yew","year":"2004","unstructured":"Chin-Yew Lin. 2004. Rouge: A package for automatic evaluation of summaries. In Text Summarization Branches Out. 74-81.","journal-title":"Text Summarization Branches Out."},{"key":"e_1_3_2_2_22_1","volume-title":"Sgdr: Stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983","author":"Loshchilov Ilya","year":"2016","unstructured":"Ilya Loshchilov and Frank Hutter. 2016. Sgdr: Stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983 (2016)."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-14415-z"},{"key":"e_1_3_2_2_24_1","volume-title":"Learnable pooling with context gating for video classification. arXiv preprint arXiv:1706.06905","author":"Miech Antoine","year":"2017","unstructured":"Antoine Miech, Ivan Laptev, and Josef Sivic. 2017. Learnable pooling with context gating for video classification. arXiv preprint arXiv:1706.06905 (2017)."},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00536"},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. 311-318","author":"Papineni Kishore","year":"2002","unstructured":"Kishore Papineni, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. Bleu: a method for automatic evaluation of machine translation. In Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. 311-318."},{"key":"e_1_3_2_2_27_1","volume-title":"International Conference on Machine Learning. PMLR, 28492-28518","author":"Radford Alec","year":"2023","unstructured":"Alec Radford, Jong Wook Kim, Tao Xu, Greg Brockman, Christine McLeavey, and Ilya Sutskever. 2023. Robust speech recognition via large-scale weak supervision. In International Conference on Machine Learning. PMLR, 28492-28518."},{"key":"e_1_3_2_2_28_1","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume":"21","author":"Raffel Colin","year":"2020","unstructured":"Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, and Peter J Liu. 2020. Exploring the limits of transfer learning with a unified text-to-text transformer. Journal of Machine Learning Research, Vol. 21, 140 (2020), 1-67.","journal-title":"Journal of Machine Learning Research"},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00340"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689061.3689069"},{"key":"e_1_3_2_2_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICDAR.2007.4376991"},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1145\/3689061.3689077"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW53098.2021.00513"},{"key":"e_1_3_2_2_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00751"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.3014606"},{"key":"e_1_3_2_2_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01032"},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681178"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01727"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 8th International ACM Workshop on Multimedia Content Analysis in Sports"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3728423.3759412","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T14:30:38Z","timestamp":1773153038000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3728423.3759412"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":40,"alternative-id":["10.1145\/3728423.3759412","10.1145\/3728423"],"URL":"https:\/\/doi.org\/10.1145\/3728423.3759412","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}