{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T11:37:36Z","timestamp":1773229056398,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":32,"publisher":"ACM","funder":[{"name":"JST, ACT-X","award":["JPMJAX24CH,"],"award-info":[{"award-number":["JPMJAX24CH,"]}]},{"name":"JSPS KAKENHI","award":["JP24K23889"],"award-info":[{"award-number":["JP24K23889"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3728423.3759408","type":"proceedings-article","created":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T15:32:07Z","timestamp":1759937527000},"page":"105-113","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["Shot2Tactic-Caption: Multi-Scale Captioning of Badminton Videos for Tactical Understanding"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3067-7341","authenticated-orcid":false,"given":"Ning","family":"Ding","sequence":"first","affiliation":[{"name":"Nagoya Institute of Technology, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5487-4297","authenticated-orcid":false,"given":"Keisuke","family":"Fujii","sequence":"additional","affiliation":[{"name":"Nagoya University, Nagoya, Japan"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9712-7777","authenticated-orcid":false,"given":"Toru","family":"Tamaki","sequence":"additional","affiliation":[{"name":"Nagoya Institute of Technology, Nagoya, Japan"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"Badminton World Federation. 2024. BWF Official YouTube Channel. https: \/\/www.youtube.com\/@bwftv. Accessed: 2024-08-01."},{"key":"e_1_3_2_2_2_1","volume-title":"arXiv preprint arXiv:2502.13923","author":"Bai Shuai","year":"2025","unstructured":"Shuai Bai, Keqin Chen, Xuejing Liu, Jialin Wang, Wenbin Ge, Sibo Song, Kai Dang, PengWang, ShijieWang, Jun Tang, Humen Zhong, Yuanzhi Zhu, Mingkun Yang, Zhaohai Li, Jianqiang Wan, Pengfei Wang, Wei Ding, Zheren Fu, Yiheng Xu, Jiabo Ye, Xi Zhang, Tianbao Xie, Zesen Cheng, Hang Zhang, Zhibo Yang, Haiyang Xu, and Junyang Lin. 2025. Qwen2.5-VL Technical Report. arXiv preprint arXiv:2502.13923 (2025)."},{"key":"e_1_3_2_2_3_1","volume-title":"Proceedings of the 5th International ACM Workshop on Multimedia Content Analysis in Sports. 47--54","author":"Abdullah Junaidi","year":"2022","unstructured":"Kar-Weng Ban, John See, Junaidi Abdullah, and Yuen Peng Loh. 2022. Badmintondb: A badminton dataset for player-specific match analysis and prediction. In Proceedings of the 5th International ACM Workshop on Multimedia Content Analysis in Sports. 47--54."},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72","author":"Banerjee Satanjeev","year":"2005","unstructured":"Satanjeev Banerjee and Alon Lavie. 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In Proceedings of the acl workshop on intrinsic and extrinsic evaluation measures for machine translation and\/or summarization. 65--72."},{"key":"e_1_3_2_2_5_1","volume-title":"VideoLLaMA 2: Advancing Spatial-Temporal Modeling and Audio Understanding in Video-LLMs. arXiv preprint arXiv:2406.07476","author":"Cheng Zesen","year":"2024","unstructured":"Zesen Cheng, Sicong Leng, Hang Zhang, Yifei Xin, Xin Li, Guanzheng Chen, Yongxin Zhu, Wenqi Zhang, Ziyang Luo, Deli Zhao, and Lidong Bing. 2024. VideoLLaMA 2: Advancing Spatial-Temporal Modeling and Audio Understanding in Video-LLMs. arXiv preprint arXiv:2406.07476 (2024). https:\/\/arxiv.org\/abs\/ 2406.07476"},{"key":"e_1_3_2_2_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"e_1_3_2_2_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3175314"},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1007\/s11042-023-16362-1"},{"key":"e_1_3_2_2_9_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.procs.2022.10.125"},{"key":"e_1_3_2_2_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_2_2_11_1","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops. 958--959","author":"Iashin Vladimir","year":"2020","unstructured":"Vladimir Iashin and Esa Rahtu. 2020. Multi-modal dense video captioning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops. 958--959."},{"key":"e_1_3_2_2_12_1","unstructured":"Will Kay Joao Carreira Karen Simonyan Brian Zhang Chloe Hillier Sudheendra Vijayanarasimhan Fabio Viola Tim Green Trevor Back Paul Natsev et al. 2017. The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)."},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"key":"e_1_3_2_2_14_1","volume-title":"Sports-QA: A Large-Scale Video Question Answering Benchmark for Complex and Professional Sports. arXiv preprint arXiv:2401.01505","author":"Li Haopeng","year":"2024","unstructured":"Haopeng Li, Andong Deng, Qiuhong Ke, Jun Liu, Hossein Rahmani, Yulan Guo, Bernt Schiele, and Chen Chen. 2024. Sports-QA: A Large-Scale Video Question Answering Benchmark for Complex and Professional Sports. arXiv preprint arXiv:2401.01505 (2024)."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"e_1_3_2_2_16_1","volume-title":"Proceedings, Part IV 16","author":"Liu Bin","year":"2020","unstructured":"Bin Liu, Yue Cao, Yutong Lin, Qi Li, Zheng Zhang, Mingsheng Long, and Han Hu. 2020. Negative margin matters: Understanding margin in few-shot classification. In Computer Vision--ECCV 2020: 16th European Conference, Glasgow, UK, August 23--28, 2020, Proceedings, Part IV 16. Springer, 438--455."},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00536"},{"key":"e_1_3_2_2_18_1","doi-asserted-by":"publisher","DOI":"10.3115\/1073083.1073135"},{"key":"e_1_3_2_2_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2000.902885"},{"key":"e_1_3_2_2_20_1","doi-asserted-by":"crossref","unstructured":"Jon Postel. 1981. Transmission control protocol. Technical Report.","DOI":"10.17487\/rfc0793"},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1145\/3583780.3615120"},{"key":"e_1_3_2_2_22_1","doi-asserted-by":"publisher","DOI":"10.1038\/s41597-024-03144-z"},{"key":"e_1_3_2_2_23_1","volume-title":"Going for GOAL: A resource for grounded football commentaries. arXiv preprint arXiv:2211.04534","author":"Suglia Alessandro","year":"2022","unstructured":"Alessandro Suglia, Jos\u00e9 Lopes, Emanuele Bastianelli, Andrea Vanzo, Shubham Agarwal, Malvina Nikandrou, Lu Yu, Ioannis Konstas, and Verena Rieser. 2022. Going for GOAL: A resource for grounded football commentaries. arXiv preprint arXiv:2211.04534 (2022)."},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPAI51961.2020.00023"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"e_1_3_2_2_26_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"e_1_3_2_2_27_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.dib.2024.110665"},{"key":"e_1_3_2_2_28_1","volume-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 3410--3419","author":"Xarles Artur","year":"2024","unstructured":"Artur Xarles, Sergio Escalera, Thomas B Moeslund, and Albert Clap\u00e9s. 2024. TDEED: Temporal-Discriminability Enhancer Encoder-Decoder for Precise Event Spotting in Sports Videos. In Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. 3410--3419."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.127567"},{"key":"e_1_3_2_2_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00629"},{"key":"e_1_3_2_2_31_1","volume-title":"Generalized cross entropy loss for training deep neural networks with noisy labels. Advances in neural information processing systems 31","author":"Zhang Zhilu","year":"2018","unstructured":"Zhilu Zhang and Mert Sabuncu. 2018. Generalized cross entropy loss for training deep neural networks with noisy labels. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01727"}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","location":"Dublin Ireland","acronym":"MM '25","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 8th International ACM Workshop on Multimedia Content Analysis in Sports"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3728423.3759408","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T14:31:58Z","timestamp":1773153118000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3728423.3759408"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":32,"alternative-id":["10.1145\/3728423.3759408","10.1145\/3728423"],"URL":"https:\/\/doi.org\/10.1145\/3728423.3759408","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}