{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,9]],"date-time":"2026-05-09T00:13:26Z","timestamp":1778285606837,"version":"3.51.4"},"reference-count":60,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62236010"],"award-info":[{"award-number":["62236010"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004826","name":"Beijing Natural Science Foundation","doi-asserted-by":"publisher","award":["L233008"],"award-info":[{"award-number":["L233008"]}],"id":[{"id":"10.13039\/501100004826","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Engineering Applications of Artificial Intelligence"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.engappai.2026.114756","type":"journal-article","created":{"date-parts":[[2026,4,8]],"date-time":"2026-04-08T14:51:52Z","timestamp":1775659912000},"page":"114756","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"P1","title":["A novel identity-aware video captioning method with multi-perspective visual information for basketball"],"prefix":"10.1016","volume":"176","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-1279-818X","authenticated-orcid":false,"given":"Haitao","family":"Li","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-4428-7111","authenticated-orcid":false,"given":"Haoran","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3181-6761","authenticated-orcid":false,"given":"Zeyu","family":"Xi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0271-5515","authenticated-orcid":false,"given":"Haoying","family":"Sun","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7209-0215","authenticated-orcid":false,"given":"Lifang","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"78","reference":[{"key":"10.1016\/j.engappai.2026.114756_b1","doi-asserted-by":"crossref","unstructured":"Ayyubi, H., Liu, T., Nagrani, A., Lin, X., Zhang, M., Arnab, A., Han, F., Zhu, Y., Feng, X., Zhang, K., et al., 2024. VIEWS: Entity-Aware News Video Captioning. In: Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing. pp. 20220\u201320239.","DOI":"10.18653\/v1\/2024.emnlp-main.1128"},{"key":"10.1016\/j.engappai.2026.114756_b2","unstructured":"Banerjee, S., Lavie, A., 2005. METEOR: An automatic metric for MT evaluation with improved correlation with human judgments. In: Proceedings of the Acl Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/Or Summarization. pp. 65\u201372."},{"key":"10.1016\/j.engappai.2026.114756_b3","first-page":"4","article-title":"Is space-time attention all you need for video understanding?","volume":"vol. 2","author":"Bertasius","year":"2021"},{"key":"10.1016\/j.engappai.2026.114756_b4","article-title":"ADP: Graph adaptive pooling based on edge understanding with graph pooling information bottleneck","author":"Cao","year":"2025","journal-title":"IEEE Trans. Consum. Electron."},{"key":"10.1016\/j.engappai.2026.114756_b5","series-title":"Expanding performance boundaries of open-source multimodal models with model, data, and test-time scaling","author":"Chen","year":"2024"},{"key":"10.1016\/j.engappai.2026.114756_b6","series-title":"Learning phrase representations using RNN encoder-decoder for statistical machine translation","author":"Cho","year":"2014"},{"key":"10.1016\/j.engappai.2026.114756_b7","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112219","article-title":"LLM-Commentator: Novel fine-tuning strategies of large language models for automatic commentary generation using football event data","volume":"300","author":"Cook","year":"2024","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.engappai.2026.114756_b8","series-title":"SynPO: Synergizing descriptiveness and preference optimization for video detailed captioning","author":"Dang","year":"2025"},{"key":"10.1016\/j.engappai.2026.114756_b9","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2025.113856","article-title":"Multimodal representation fusion method for dense video captioning","author":"Fang","year":"2025","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.engappai.2026.114756_b10","doi-asserted-by":"crossref","first-page":"424","DOI":"10.1016\/j.inffus.2022.09.025","article-title":"Multimodal sentiment analysis: A systematic review of history, datasets, multimodal fusion methods, applications, challenges and future directions","volume":"91","author":"Gandhi","year":"2023","journal-title":"Inf. Fusion"},{"key":"10.1016\/j.engappai.2026.114756_b11","series-title":"The llama 3 herd of models","author":"Grattafiori","year":"2024"},{"key":"10.1016\/j.engappai.2026.114756_b12","doi-asserted-by":"crossref","unstructured":"Gu, X., Chen, G., Wang, Y., Zhang, L., Luo, T., Wen, L., 2023. Text with knowledge graph augmented transformer for video captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 18941\u201318951.","DOI":"10.1109\/CVPR52729.2023.01816"},{"key":"10.1016\/j.engappai.2026.114756_b13","doi-asserted-by":"crossref","unstructured":"Guadarrama, S., Krishnamoorthy, N., Malkarnenkar, G., Venugopalan, S., Mooney, R., Darrell, T., Saenko, K., 2013. Youtube2text: Recognizing and describing arbitrary activities using semantic hierarchies and zero-shot recognition. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 2712\u20132719.","DOI":"10.1109\/ICCV.2013.337"},{"key":"10.1016\/j.engappai.2026.114756_b14","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R., 2017. Mask r-cnn. In: Proceedings of the IEEE International Conference on Computer Vision. pp. 2961\u20132969.","DOI":"10.1109\/ICCV.2017.322"},{"key":"10.1016\/j.engappai.2026.114756_b15","series-title":"Storyteller: Improving long video description through global audio-visual character identification","author":"He","year":"2024"},{"key":"10.1016\/j.engappai.2026.114756_b16","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110773","article-title":"Lightweight recurrent cross-modal encoder for video question answering","volume":"276","author":"Immanuel","year":"2023","journal-title":"Knowl.-Based Syst."},{"issue":"3","key":"10.1016\/j.engappai.2026.114756_b17","doi-asserted-by":"crossref","first-page":"1543","DOI":"10.3390\/app15031543","article-title":"Integrated AI system for real-time sports broadcasting: Player behavior, game event recognition, and generative AI commentary in basketball games","volume":"15","author":"Jung","year":"2025","journal-title":"Appl. Sci."},{"key":"10.1016\/j.engappai.2026.114756_b18","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.111147","article-title":"MDM: Meta diffusion model for hard-constrained text generation","volume":"283","author":"Ke","year":"2024","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.engappai.2026.114756_b19","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1023\/A:1020346032608","article-title":"Natural language description of human activities from video images based on concept hierarchy of actions","volume":"50","author":"Kojima","year":"2002","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.engappai.2026.114756_b20","first-page":"541","article-title":"Generating natural-language video descriptions using text-mined knowledge","volume":"vol. 27","author":"Krishnamoorthy","year":"2013"},{"key":"10.1016\/j.engappai.2026.114756_b21","series-title":"Text Summarization Branches Out","first-page":"74","article-title":"Rouge: A package for automatic evaluation of summaries","author":"Lin","year":"2004"},{"key":"10.1016\/j.engappai.2026.114756_b22","doi-asserted-by":"crossref","unstructured":"Lin, K., Li, L., Lin, C.C., Ahmed, F., Gan, Z., Liu, Z., Lu, Y., Wang, L., 2022. Swinbert: End-to-end transformers with sparse attention for video captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 17949\u201317958.","DOI":"10.1109\/CVPR52688.2022.01742"},{"issue":"4","key":"10.1016\/j.engappai.2026.114756_b23","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3715142","article-title":"Mix-modality person re-identification: A new and practical paradigm","volume":"21","author":"Liu","year":"2025","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl."},{"key":"10.1016\/j.engappai.2026.114756_b24","series-title":"Mediapipe: A framework for building perception pipelines","author":"Lugaresi","year":"2019"},{"key":"10.1016\/j.engappai.2026.114756_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112258","article-title":"Style-aware two-stage learning framework for video captioning","volume":"301","author":"Ma","year":"2024","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.engappai.2026.114756_b26","doi-asserted-by":"crossref","unstructured":"Nakamura, K., Ohashi, H., Okada, M., 2021. Sensor-augmented egocentric-video captioning with dynamic modal attention. In: Proceedings of the 29th ACM International Conference on Multimedia. pp. 4220\u20134229.","DOI":"10.1145\/3474085.3475557"},{"key":"10.1016\/j.engappai.2026.114756_b27","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J., 2002. Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics. pp. 311\u2013318.","DOI":"10.3115\/1073083.1073135"},{"key":"10.1016\/j.engappai.2026.114756_b28","doi-asserted-by":"crossref","unstructured":"Qi, J., Yu, J., Tu, T., Gao, K., Xu, Y., Guan, X., Wang, X., Xu, B., Hou, L., Li, J., et al., 2023. GOAL: A challenging knowledge-grounded video captioning benchmark for real-time soccer commentary generation. In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management. pp. 5391\u20135395.","DOI":"10.1145\/3583780.3615120"},{"key":"10.1016\/j.engappai.2026.114756_b29","doi-asserted-by":"crossref","unstructured":"Raajesh, H., Desanur, N.R., Khan, Z., Tapaswi, M., 2024. Micap: A unified model for identity-aware movie descriptions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 14011\u201314021.","DOI":"10.1109\/CVPR52733.2024.01329"},{"key":"10.1016\/j.engappai.2026.114756_b30","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"issue":"8","key":"10.1016\/j.engappai.2026.114756_b31","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"10.1016\/j.engappai.2026.114756_b32","doi-asserted-by":"crossref","unstructured":"Rao, J., Wu, H., Jiang, H., Zhang, Y., Wang, Y., Xie, W., 2025. Towards universal soccer video understanding. In: Proceedings of the Computer Vision and Pattern Recognition Conference. pp. 8384\u20138394.","DOI":"10.1109\/CVPR52734.2025.00785"},{"key":"10.1016\/j.engappai.2026.114756_b33","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.111056","article-title":"Parallel encoder\u2013decoder framework for image captioning","volume":"282","author":"Saeidimesineh","year":"2023","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.engappai.2026.114756_b34","first-page":"152","article-title":"Expert comment generation from sports videos using multimodal LLM","volume":"vol. 13510","author":"Seino","year":"2025"},{"key":"10.1016\/j.engappai.2026.114756_b35","doi-asserted-by":"crossref","unstructured":"Shen, Y., Gu, X., Xu, K., Fan, H., Wen, L., Zhang, L., 2023. Accurate and fast compressed video captioning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 15558\u201315567.","DOI":"10.1109\/ICCV51070.2023.01426"},{"key":"10.1016\/j.engappai.2026.114756_b36","doi-asserted-by":"crossref","first-page":"347","DOI":"10.1016\/j.neucom.2020.08.035","article-title":"Video captioning with boundary-aware hierarchical language decoding and joint video prediction","volume":"417","author":"Shi","year":"2020","journal-title":"Neurocomputing"},{"key":"10.1016\/j.engappai.2026.114756_b37","doi-asserted-by":"crossref","first-page":"1122","DOI":"10.1109\/TIP.2024.3359045","article-title":"Emotional video captioning with vision-based emotion interpretation network","volume":"33","author":"Song","year":"2024","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.engappai.2026.114756_b38","doi-asserted-by":"crossref","unstructured":"Sun, H., Li, S., Xi, Z., Zhang, B., Wu, L., 2025. DSSM-KG: Dual-Stream State-Space Modeling with Adaptive Knowledge Injection for Video Captioning. In: Proceedings of the 2025 International Conference on Multimedia Retrieval. pp. 2038\u20132042.","DOI":"10.1145\/3731715.3733474"},{"key":"10.1016\/j.engappai.2026.114756_b39","doi-asserted-by":"crossref","unstructured":"Tang, M., Wang, Z., Liu, Z., Rao, F., Li, D., Li, X., 2021. Clip4caption: Clip for video caption. In: Proceedings of the 29th ACM International Conference on Multimedia. pp. 4858\u20134862.","DOI":"10.1145\/3474085.3479207"},{"key":"10.1016\/j.engappai.2026.114756_b40","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.engappai.2026.114756_b41","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence Zitnick, C., Parikh, D., 2015. Cider: Consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. pp. 4566\u20134575.","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"10.1016\/j.engappai.2026.114756_b42","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, D., Luo, C., He, B., Yuan, L., Wu, Z., Jiang, Y.G., 2024. Omnivid: A generative framework for universal video understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 18209\u201318220.","DOI":"10.1109\/CVPR52733.2024.01724"},{"key":"10.1016\/j.engappai.2026.114756_b43","first-page":"8965","article-title":"Learning to compose topic-aware mixture of experts for zero-shot video captioning","volume":"vol. 33","author":"Wang","year":"2019"},{"issue":"9","key":"10.1016\/j.engappai.2026.114756_b44","doi-asserted-by":"crossref","first-page":"4484","DOI":"10.1109\/TCSVT.2023.3277827","article-title":"Concept parser with multimodal graph learning for video captioning","volume":"33","author":"Wu","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.engappai.2026.114756_b45","series-title":"European Conference on Computer Vision","first-page":"19","article-title":"Sports video analysis on large-scale data","author":"Wu","year":"2022"},{"key":"10.1016\/j.engappai.2026.114756_b46","doi-asserted-by":"crossref","DOI":"10.1016\/j.neucom.2024.129177","article-title":"A simple yet effective knowledge guided method for entity-aware video captioning on a basketball benchmark","volume":"619","author":"Xi","year":"2025","journal-title":"Neurocomputing"},{"key":"10.1016\/j.engappai.2026.114756_b47","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.126906","article-title":"EIKA: Explicit & implicit knowledge-augmented network for entity-aware sports video captioning","volume":"274","author":"Xi","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.engappai.2026.114756_b48","doi-asserted-by":"crossref","unstructured":"Xi, Z., Sun, H., Wu, Y., Yan, J., Zhang, H., Wu, L., Wang, L., Chen, C., 2025c. Player-Centric Multimodal Prompt Generation for Large Language Model Based Identity-Aware Basketball Video Captioning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision. pp. 24330\u201324339.","DOI":"10.1109\/ICCV51701.2025.02255"},{"key":"10.1016\/j.engappai.2026.114756_b49","series-title":"2020 IEEE International Conference on Multimedia and Expo","first-page":"1","article-title":"Video captioning with temporal and region graph convolution network","author":"Xiao","year":"2020"},{"key":"10.1016\/j.engappai.2026.114756_b50","series-title":"Chinese Conference on Pattern Recognition and Computer Vision","first-page":"368","article-title":"Clip meets video captioning: Concept-aware representation learning does matter","author":"Yang","year":"2022"},{"key":"10.1016\/j.engappai.2026.114756_b51","doi-asserted-by":"crossref","unstructured":"Ye, C., Chen, W., Song, P., Liu, X., Zhang, L., Mao, Z., 2025. Multi-round Mutual Emotion-Cause Pair Extraction for Emotion-Attributed Video Captioning. In: Proceedings of the 33rd ACM International Conference on Multimedia. pp. 3320\u20133329.","DOI":"10.1145\/3746027.3755048"},{"key":"10.1016\/j.engappai.2026.114756_b52","series-title":"ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"11786","article-title":"Exploring object-centered external knowledge for fine-grained video paragraph captioning","author":"Yu","year":"2024"},{"key":"10.1016\/j.engappai.2026.114756_b53","doi-asserted-by":"crossref","first-page":"5147","DOI":"10.1109\/TMM.2023.3330070","article-title":"GPT-based knowledge guiding network for commonsense video captioning","volume":"26","author":"Yuan","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.114756_b54","doi-asserted-by":"crossref","unstructured":"Zhang, B., Gao, J., Yuan, Y., 2024. A Descriptive Basketball Highlight Dataset for Automatic Commentary Generation. In: Proceedings of the 32nd ACM International Conference on Multimedia. pp. 10316\u201310325.","DOI":"10.1145\/3664647.3681178"},{"issue":"8","key":"10.1016\/j.engappai.2026.114756_b55","doi-asserted-by":"crossref","first-page":"6735","DOI":"10.1109\/TCSVT.2023.3289142","article-title":"Differential feature awareness network within antagonistic learning for infrared-visible object detection","volume":"34","author":"Zhang","year":"2023","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"10.1016\/j.engappai.2026.114756_b56","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2020.107260","article-title":"Multi-camera multi-player tracking with deep player identification in sports video","volume":"102","author":"Zhang","year":"2020","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.engappai.2026.114756_b57","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1109\/TMM.2021.3070138","article-title":"Deep-IRTarget: An automatic target detector in infrared imagery using dual-domain feature extraction and allocation","volume":"24","author":"Zhang","year":"2021","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.114756_b58","article-title":"A benchmark and frequency compression method for infrared few-shot object detection","author":"Zhang","year":"2025","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.engappai.2026.114756_b59","doi-asserted-by":"crossref","first-page":"2659","DOI":"10.1109\/TMM.2023.3301279","article-title":"Boosting entity-aware image captioning with multi-modal knowledge graph","volume":"26","author":"Zhao","year":"2023","journal-title":"IEEE Trans. Multimed."},{"key":"10.1016\/j.engappai.2026.114756_b60","article-title":"From multi-scale grids to dynamic regions: Dual-relation enhanced transformer for image captioning","author":"Zhou","year":"2025","journal-title":"Knowl.-Based Syst."}],"container-title":["Engineering Applications of Artificial Intelligence"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626010389?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0952197626010389?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,5,8]],"date-time":"2026-05-08T23:34:58Z","timestamp":1778283298000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0952197626010389"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":60,"alternative-id":["S0952197626010389"],"URL":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114756","relation":{},"ISSN":["0952-1976"],"issn-type":[{"value":"0952-1976","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"A novel identity-aware video captioning method with multi-perspective visual information for basketball","name":"articletitle","label":"Article Title"},{"value":"Engineering Applications of Artificial Intelligence","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.engappai.2026.114756","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"114756"}}