{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T13:01:34Z","timestamp":1781182894948,"version":"3.54.1"},"reference-count":59,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,4,27]],"date-time":"2026-04-27T00:00:00Z","timestamp":1777248000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Vision and Image Understanding"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.cviu.2026.104790","type":"journal-article","created":{"date-parts":[[2026,4,30]],"date-time":"2026-04-30T20:26:11Z","timestamp":1777580771000},"page":"104790","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["FOOTPASS: A multi-modal multi-agent tactical context dataset for play-by-play action spotting in soccer broadcast videos"],"prefix":"10.1016","volume":"269","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3909-3757","authenticated-orcid":false,"given":"J\u00e9r\u00e9mie","family":"Ochin","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rapha\u00ebl","family":"Chekroun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bogdan","family":"Stanciulescu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Sotiris","family":"Manitsaris","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.cviu.2026.104790_b1","series-title":"Machine Learning and Data Mining for Sports Analytics","first-page":"31","article-title":"Distinguishing between roles of football players in play-by-play match event data","author":"Aalbers","year":"2019"},{"key":"10.1016\/j.cviu.2026.104790_b2","series-title":"Common data format (CDF): A standardized format for match-data in football (soccer)","author":"Anzer","year":"2025"},{"issue":"1","key":"10.1016\/j.cviu.2026.104790_b3","doi-asserted-by":"crossref","first-page":"195","DOI":"10.1038\/s41597-025-04505-y","article-title":"An integrated dataset of spatiotemporal and event data in elite soccer","volume":"12","author":"Bassek","year":"2025","journal-title":"Sci. Data"},{"key":"10.1016\/j.cviu.2026.104790_b4","series-title":"2014 IEEE International Conference on Data Mining","first-page":"725","article-title":"Large-scale analysis of soccer matches using spatiotemporal tracking data","author":"Bialkowski","year":"2014"},{"issue":"2","key":"10.1016\/j.cviu.2026.104790_b5","doi-asserted-by":"crossref","DOI":"10.3390\/info11020125","article-title":"Albumentations: Fast and flexible image augmentations","volume":"11","author":"Buslaev","year":"2020","journal-title":"Information"},{"key":"10.1016\/j.cviu.2026.104790_b6","series-title":"IEEE International Conference on Image Processing","first-page":"2313","article-title":"Footbots: A transformer-based architecture for motion prediction in soccer","author":"Capellera","year":"2024"},{"key":"10.1016\/j.cviu.2026.104790_b7","series-title":"Proceedings of the 5th International ACM Workshop on Multimedia Content Analysis in Sports","first-page":"93","article-title":"A graph-based method for soccer action spotting using unsupervised player classification","author":"Cartas","year":"2022"},{"key":"10.1016\/j.cviu.2026.104790_b8","series-title":"Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing","first-page":"1724","article-title":"Learning phrase representations using RNN encoder\u2013decoder for statistical machine translation","author":"Cho","year":"2014"},{"issue":"1","key":"10.1016\/j.cviu.2026.104790_b9","doi-asserted-by":"crossref","first-page":"355","DOI":"10.1038\/s41597-022-01469-1","article-title":"Scaling up SoccerNet with multi-view spatial localization and re-identification","volume":"9","author":"Cioppa","year":"2022","journal-title":"Sci. Data"},{"key":"10.1016\/j.cviu.2026.104790_b10","series-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","first-page":"3490","article-title":"SoccerNet-tracking: Multiple object tracking dataset and benchmark in soccer videos","author":"Cioppa","year":"2022"},{"key":"10.1016\/j.cviu.2026.104790_b11","series-title":"SoccerNet 2024 challenges results","author":"Cioppa","year":"2024"},{"issue":"2","key":"10.1016\/j.cviu.2026.104790_b12","doi-asserted-by":"crossref","first-page":"24","DOI":"10.1007\/s12283-024-00466-4","article-title":"SoccerNet 2023 challenges results","volume":"27","author":"Cioppa","year":"2024","journal-title":"Sport. Eng."},{"issue":"39","key":"10.1016\/j.cviu.2026.104790_b13","doi-asserted-by":"crossref","first-page":"29685","DOI":"10.1007\/s11042-020-09409-0","article-title":"Techniques and applications for soccer video analysis: A survey","volume":"79","author":"Cuevas","year":"2020","journal-title":"Multimedia Tools Appl."},{"issue":"4","key":"10.1016\/j.cviu.2026.104790_b14","doi-asserted-by":"crossref","first-page":"2157","DOI":"10.1007\/s00180-025-01604-7","article-title":"Lasso multinomial performance indicators for in-play basketball data: Lasso multinomial performance indicators for in-play...","volume":"40","author":"Damoulaki","year":"2025","journal-title":"Comput. Statist."},{"issue":"9","key":"10.1016\/j.cviu.2026.104790_b15","doi-asserted-by":"crossref","first-page":"6977","DOI":"10.1007\/s10994-024-06585-0","article-title":"Methodology and evaluation in sports analytics: challenges, approaches, and lessons learned","volume":"113","author":"Davis","year":"2024","journal-title":"Mach. Learn."},{"key":"10.1016\/j.cviu.2026.104790_b16","series-title":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","first-page":"4503","article-title":"SoccerNet-v2: A dataset and benchmarks for holistic understanding of broadcast soccer videos","author":"Deli\u00e8ge","year":"2021"},{"issue":"2","key":"10.1016\/j.cviu.2026.104790_b17","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","article-title":"The pascal visual object classes (VOC) challenge","volume":"88","author":"Everingham","year":"2010","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.cviu.2026.104790_b18","doi-asserted-by":"crossref","unstructured":"Feichtenhofer,\u00a0C., 2020. X3d: Expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. pp. 203\u2013213.","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"10.1016\/j.cviu.2026.104790_b19","series-title":"Computer Vision \u2013 ECCV 2018","first-page":"761","article-title":"Where will they go? Predicting fine-grained adversarial multi-agent motion using conditional variational autoencoders","author":"Felsen","year":"2018"},{"issue":"39\u201340","key":"10.1016\/j.cviu.2026.104790_b20","doi-asserted-by":"crossref","first-page":"28971","DOI":"10.1007\/s11042-020-09414-3","article-title":"SSET: a dataset for shot segmentation, event detection, player tracking in soccer videos","volume":"79","author":"Feng","year":"2020","journal-title":"Multimedia Tools Appl."},{"key":"10.1016\/j.cviu.2026.104790_b21","series-title":"Electronic performance and tracking systems (EPTS)","author":"FIFA","year":"2026"},{"issue":"6","key":"10.1016\/j.cviu.2026.104790_b22","doi-asserted-by":"crossref","first-page":"1567","DOI":"10.1177\/17479541221075734","article-title":"The use of player tracking data to analyze defensive play in professional soccer - A scoping review","volume":"17","author":"Forcher","year":"2022","journal-title":"Int. J. Sport. Sci. Coach."},{"key":"10.1016\/j.cviu.2026.104790_b23","series-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops","first-page":"1792","article-title":"SoccerNet: A Scalable Dataset for Action Spotting in Soccer Videos","author":"Giancola","year":"2018"},{"key":"10.1016\/j.cviu.2026.104790_b24","series-title":"Proceedings of the 5th International ACM Workshop on Multimedia Content Analysis in Sports","first-page":"75","article-title":"SoccerNet 2022 challenges results","author":"Giancola","year":"2022"},{"key":"10.1016\/j.cviu.2026.104790_b25","series-title":"SoccerNet 2025 challenges results","author":"Giancola","year":"2025"},{"key":"10.1016\/j.cviu.2026.104790_b26","doi-asserted-by":"crossref","unstructured":"Guti\u00e9rrez-P\u00e9rez,\u00a0M., Agudo,\u00a0A., 2025. SoccerNet-v3D: Leveraging Sports Broadcast Replays for 3D Scene Understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) Workshops. pp. 5977\u20135986.","DOI":"10.1109\/CVPRW67362.2025.00595"},{"key":"10.1016\/j.cviu.2026.104790_b27","series-title":"Multiple View Geometry in Computer Vision","author":"Hartley","year":"2004"},{"key":"10.1016\/j.cviu.2026.104790_b28","series-title":"2017 IEEE International Conference on Computer Vision","first-page":"2980","article-title":"Mask R-CNN","author":"He","year":"2017"},{"key":"10.1016\/j.cviu.2026.104790_b29","series-title":"Computer Vision \u2013 ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXXV","first-page":"33","article-title":"Spotting temporally precise, fine-grained events in video","author":"Hong","year":"2022"},{"key":"10.1016\/j.cviu.2026.104790_b30","series-title":"2022 IEEE 12th Sensor Array and Multichannel Signal Processing Workshop","first-page":"221","article-title":"Video analytics in elite soccer: A distributed computing perspective","author":"Jha","year":"2022"},{"key":"10.1016\/j.cviu.2026.104790_b31","series-title":"Proceedings of the 3rd International Workshop on Multimedia Content Analysis in Sports","first-page":"1","article-title":"Soccerdb: A large-scale database for comprehensive video understanding","author":"Jiang","year":"2020"},{"key":"10.1016\/j.cviu.2026.104790_b32","doi-asserted-by":"crossref","DOI":"10.3389\/fpsyg.2019.01738","article-title":"Play-by-play network analysis in football","volume":"10","author":"Korte","year":"2019","journal-title":"Front. Psychol."},{"key":"10.1016\/j.cviu.2026.104790_b33","series-title":"2021 IEEE\/CVF International Conference on Computer Vision","first-page":"13516","article-title":"MultiSports: A multi-person video dataset of spatio-temporally localized sports actions","author":"Li","year":"2021"},{"issue":"2","key":"10.1016\/j.cviu.2026.104790_b34","doi-asserted-by":"crossref","first-page":"318","DOI":"10.1109\/TPAMI.2018.2858826","article-title":"Focal loss for dense object detection","volume":"42","author":"Lin","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.cviu.2026.104790_b35","series-title":"7th International Conference on Learning Representations","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2019"},{"key":"10.1016\/j.cviu.2026.104790_b36","series-title":"2013 IEEE Conference on Computer Vision and Pattern Recognition","first-page":"2706","article-title":"Representing and discovering adversarial team behaviors using player roles","author":"Lucey","year":"2013"},{"key":"10.1016\/j.cviu.2026.104790_b37","doi-asserted-by":"crossref","DOI":"10.3389\/fspor.2021.676179","article-title":"Space and control in soccer","volume":"3","author":"Martens","year":"2021","journal-title":"Front. Sport. Act. Living"},{"issue":"9","key":"10.1016\/j.cviu.2026.104790_b38","doi-asserted-by":"crossref","first-page":"8687","DOI":"10.1007\/s10994-024-06606-y","article-title":"Towards a foundation large events model for soccer","volume":"113","author":"Mendes-Neves","year":"2024","journal-title":"Mach. Learn."},{"issue":"7","key":"10.1016\/j.cviu.2026.104790_b39","first-page":"3523","article-title":"Image segmentation using deep learning: A survey","volume":"44","author":"Minaee","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.cviu.2026.104790_b40","series-title":"Communications in Computer and Information Science","doi-asserted-by":"crossref","first-page":"119","DOI":"10.1007\/978-3-031-53833-9_10","article-title":"What data should be collected for a good handball Expected Goal model?","volume":"vol. 2035","author":"Mortelier","year":"2023"},{"key":"10.1016\/j.cviu.2026.104790_b41","series-title":"Sports Analytics","first-page":"149","article-title":"Design of a handball tactics observatory based on dynamic sub-graphs","author":"Mortelier","year":"2024"},{"key":"10.1016\/j.cviu.2026.104790_b42","series-title":"Advanced Concepts for Intelligent Vision Systems","first-page":"552","article-title":"Beyond pixels: Leveraging the language of soccer to improve spatio-temporal action detection in broadcast videos","author":"Ochin","year":"2026"},{"key":"10.1016\/j.cviu.2026.104790_b43","series-title":"Proceedings of the 14th International Conference on Pattern Recognition Applications and Methods - Volume 1: ICPRAM","first-page":"636","article-title":"Game state and spatio-temporal action detection in soccer using graph neural networks and 3D convolutional networks","author":"Ochin","year":"2025"},{"key":"10.1016\/j.cviu.2026.104790_b44","series-title":"Space evaluation at the starting point of soccer transitions","author":"Ogawa","year":"2025"},{"issue":"4","key":"10.1016\/j.cviu.2026.104790_b45","first-page":"535","article-title":"Predicting play calls in the National Football League using hidden Markov models","volume":"32","author":"\u00d6tting","year":"2021","journal-title":"IMA J. Manag. Math."},{"key":"10.1016\/j.cviu.2026.104790_b46","series-title":"Proceedings of the 20th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications","first-page":"221","article-title":"Temporally accurate events detection through ball possessor recognition in soccer","author":"Peral","year":"2025"},{"issue":"4","key":"10.1016\/j.cviu.2026.104790_b47","doi-asserted-by":"crossref","first-page":"3783","DOI":"10.1007\/s10489-022-03631-z","article-title":"Graph representations for the analysis of multi-agent spatiotemporal sports data","volume":"53","author":"Raabe","year":"2022","journal-title":"Appl. Intell."},{"key":"10.1016\/j.cviu.2026.104790_b48","series-title":"2025 IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"8384","article-title":"Towards universal soccer video understanding","author":"Rao","year":"2025"},{"key":"10.1016\/j.cviu.2026.104790_b49","series-title":"Fine-grained retrieval of sports plays using tree-based alignment of trajectories","author":"Sha","year":"2017"},{"key":"10.1016\/j.cviu.2026.104790_b50","series-title":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","first-page":"3898","article-title":"Seq2Event: Learning the language of soccer using transformer-based match event prediction","author":"Simpson","year":"2022"},{"key":"10.1016\/j.cviu.2026.104790_b51","doi-asserted-by":"crossref","unstructured":"Singh,\u00a0G., Choutas,\u00a0V., Saha,\u00a0S., Yu,\u00a0F., Van\u00a0Gool,\u00a0L., 2023. Spatio-Temporal Action Detection Under Large Motion. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision. WACV, pp. 6009\u20136018.","DOI":"10.1109\/WACV56688.2023.00595"},{"key":"10.1016\/j.cviu.2026.104790_b52","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.108458","article-title":"The evolution of object detection methods","volume":"133","author":"Sun","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.cviu.2026.104790_b53","series-title":"MVP-Shapley: Feature-based modeling for evaluating the most valuable player in basketball","author":"Sun","year":"2025"},{"issue":"C","key":"10.1016\/j.cviu.2026.104790_b54","doi-asserted-by":"crossref","first-page":"58","DOI":"10.1016\/j.eswa.2015.09.004","article-title":"Modeling basketball play-by-play data","volume":"44","author":"Vra\u010dar","year":"2016","journal-title":"Expert Syst. Appl."},{"issue":"5","key":"10.1016\/j.cviu.2026.104790_b55","doi-asserted-by":"crossref","DOI":"10.1145\/3326362","article-title":"Dynamic graph CNN for learning on point clouds","volume":"38","author":"Wang","year":"2019","journal-title":"ACM Trans. Graph."},{"issue":"1","key":"10.1016\/j.cviu.2026.104790_b56","doi-asserted-by":"crossref","first-page":"1906","DOI":"10.1038\/s41467-024-45965-x","article-title":"TacticAI: an AI assistant for football tactics","volume":"15","author":"Wang","year":"2024","journal-title":"Nat. Commun."},{"key":"10.1016\/j.cviu.2026.104790_b57","series-title":"A survey on deep learning-based spatio-temporal action detection","author":"Wang","year":"2023"},{"issue":"5","key":"10.1016\/j.cviu.2026.104790_b58","doi-asserted-by":"crossref","DOI":"10.1007\/s10489-024-05996-9","article-title":"Transformer-based neural marked spatio temporal point process model for analyzing football match events: Transformer-based neural marked spatio temporal point process model...","volume":"55","author":"Yeung","year":"2025","journal-title":"Appl. Intell."},{"key":"10.1016\/j.cviu.2026.104790_b59","series-title":"2018 IEEE Conference on Multimedia Information Processing and Retrieval","first-page":"418","article-title":"Comprehensive dataset of broadcast soccer videos","author":"Yu","year":"2018"}],"container-title":["Computer Vision and Image Understanding"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1077314226001578?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1077314226001578?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T12:28:31Z","timestamp":1781180911000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1077314226001578"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":59,"alternative-id":["S1077314226001578"],"URL":"https:\/\/doi.org\/10.1016\/j.cviu.2026.104790","relation":{},"ISSN":["1077-3142"],"issn-type":[{"value":"1077-3142","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"FOOTPASS: A multi-modal multi-agent tactical context dataset for play-by-play action spotting in soccer broadcast videos","name":"articletitle","label":"Article Title"},{"value":"Computer Vision and Image Understanding","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.cviu.2026.104790","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Author(s). Published by Elsevier Inc.","name":"copyright","label":"Copyright"}],"article-number":"104790"}}