{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T23:40:15Z","timestamp":1780357215815,"version":"3.54.1"},"publisher-location":"Cham","reference-count":19,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032073426","type":"print"},{"value":"9783032073433","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07343-3_44","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T00:13:07Z","timestamp":1767312787000},"page":"552-563","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Beyond Pixels: Leveraging the\u00a0Language of\u00a0Soccer to\u00a0Improve Spatio-Temporal Action Detection in\u00a0Broadcast Videos"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-3909-3757","authenticated-orcid":false,"given":"Jeremie","family":"Ochin","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6487-2540","authenticated-orcid":false,"given":"Raphael","family":"Chekroun","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Bogdan","family":"Stanciulescu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4552-1793","authenticated-orcid":false,"given":"Sotiris","family":"Manitsaris","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"44_CR1","doi-asserted-by":"publisher","unstructured":"Ochin, J, Devineau, G., Stanciulescu, B., Manitsaris, S.: Game state and spatio-temporal action detection in soccer using graph neural networks and 3D convolutional networks. In: Proceedings of the 14th International Conference on Pattern Recognition Applications and Methods - Volume 1: ICPRAM, pp. 636\u2013646. SciTePress, Porto, Portugal (2025). https:\/\/doi.org\/10.5220\/0013161100003905","DOI":"10.5220\/0013161100003905"},{"issue":"4","key":"44_CR2","doi-asserted-by":"publisher","first-page":"2350066","DOI":"10.1142\/S0219691323500662","volume":"22","author":"P Wang","year":"2024","unstructured":"Wang, P., Zeng, F., Qian, Y.: A survey on deep learning-based spatio-temporal action detection. Int. J. Wavelets Multiresolut. Inf. Process. 22(4), 2350066 (2024). https:\/\/doi.org\/10.1142\/S0219691323500662","journal-title":"Int. J. Wavelets Multiresolut. Inf. Process."},{"key":"44_CR3","doi-asserted-by":"publisher","unstructured":"Li, Y., Chen, L., He, R., Wang, Z., Wu, G., Wang, L: MultiSports: a multi-person video dataset of spatio-temporally localized sports actions. In: Proceedings of 2021 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 13516\u201313525. Montreal, QC, Canada (2021). https:\/\/doi.org\/10.1109\/ICCV48922.2021.01328","DOI":"10.1109\/ICCV48922.2021.01328"},{"key":"44_CR4","doi-asserted-by":"publisher","unstructured":"Singh, G., Choutas, V., Saha, S., Yu, F., Van Gool, L.: Spatio-temporal action detection under large motion. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 5998\u20136007. IEEE, Waikoloa, HI, USA, (2023). https:\/\/doi.org\/10.1109\/WACV56688.2023.00595","DOI":"10.1109\/WACV56688.2023.00595"},{"key":"44_CR5","doi-asserted-by":"publisher","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., Girshick, R.: Mask R-CNN. In: Proceedings of the 2017 IEEE International Conference on Computer Vision (ICCV), pp. 2980\u20132988. Venice, Italy (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.322","DOI":"10.1109\/ICCV.2017.322"},{"key":"44_CR6","doi-asserted-by":"publisher","unstructured":"Pena, J.L.: A Markovian model for association football possession and its outcomes. arXiv (2014). https:\/\/doi.org\/10.48550\/arXiv.1403.7993","DOI":"10.48550\/arXiv.1403.7993"},{"key":"44_CR7","doi-asserted-by":"publisher","unstructured":"Van Roy, M., Robberechts, P., Yang, W., De Raedt, L., Davis, J.: A Markov framework for learning and reasoning about strategies in professional soccer. J. Artif. Intell. Res. 77 (2023). https:\/\/doi.org\/10.1613\/jair.1.13934","DOI":"10.1613\/jair.1.13934"},{"key":"44_CR8","doi-asserted-by":"publisher","unstructured":"Simpson, I., Beal, R.J., Locke, D., Norman, T.J.: Seq2Event: learning the language of soccer using transformer-based match event prediction. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 3898\u20133908. Association for Computing Machinery, Washington DC, USA (2022). https:\/\/doi.org\/10.1145\/3534678.3539138","DOI":"10.1145\/3534678.3539138"},{"key":"44_CR9","doi-asserted-by":"publisher","first-page":"8687","DOI":"10.1007\/s10994-024-06606-y","volume":"113","author":"T Mendes-Neves","year":"2024","unstructured":"Mendes-Neves, T., Meireles, L., Mendes-Moreira, J.: Towards a foundation large events model for soccer. Mach. Learn. 113, 8687\u20138709 (2024). https:\/\/doi.org\/10.1007\/s10994-024-06606-y","journal-title":"Mach. Learn."},{"key":"44_CR10","doi-asserted-by":"publisher","unstructured":"Baron, E., Hocevar, D., Salehe, Z.: A Foundation Model for Soccer. arXiv (2024). https:\/\/doi.org\/10.48550\/arXiv.2407.14558","DOI":"10.48550\/arXiv.2407.14558"},{"issue":"10","key":"44_CR11","doi-asserted-by":"publisher","first-page":"1872","DOI":"10.1007\/s11431-020-1647-3","volume":"63","author":"XP Qiu","year":"2020","unstructured":"Qiu, X.P., Sun, T.X., Xu, Y.G., Shao, Y.F., Dai, N., Huang, X.J.: Pre-trained models for natural language processing: a survey. SCIENCE CHINA Technol. Sci. 63(10), 1872\u20131897 (2020). https:\/\/doi.org\/10.1007\/s11431-020-1647-3","journal-title":"SCIENCE CHINA Technol. Sci."},{"key":"44_CR12","doi-asserted-by":"publisher","unstructured":"Lewis, M., et al.: BART: denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pp. 7871\u20137880. Association for Computational Linguistics, Online (2020). https:\/\/doi.org\/10.18653\/v1\/2020.acl-main.703","DOI":"10.18653\/v1\/2020.acl-main.703"},{"key":"44_CR13","doi-asserted-by":"publisher","unstructured":"Saha, S., Singh, G., Sapienza, M., Torr, P.H., Cuzzolin, F.: Deep learning for detecting multiple space-time action tubes in videos. arXiv (2016). https:\/\/doi.org\/10.48550\/arXiv.1608.01529","DOI":"10.48550\/arXiv.1608.01529"},{"key":"44_CR14","doi-asserted-by":"publisher","unstructured":"Singh, G., Saha, S., Sapienza, M., Torr, P., Cuzzolin, F.: Online real-time multiple spatiotemporal action localisation and prediction. In: Proceedings of the 2017 IEEE International Conference on Computer Vision (ICCV), pp. 3657\u20133666. Venice, Italy (2017). https:\/\/doi.org\/10.1109\/ICCV.2017.393","DOI":"10.1109\/ICCV.2017.393"},{"key":"44_CR15","doi-asserted-by":"publisher","unstructured":"Kalogeiton, V., Weinzaepfel, P., Ferrari, V., Schmid, C.: Action tubelet detector for spatio-temporal action localization. In: Proceedings of the 2017 IEEE International Conference on Computer Vision (ICCV), pp. 4405\u20134413. Venice, Italy (2017). https:\/\/doi.org\/10.48550\/arXiv.1705.01861","DOI":"10.48550\/arXiv.1705.01861"},{"key":"44_CR16","doi-asserted-by":"publisher","unstructured":"Wei, X., Sha, L., Lucey, P., Morgan, S., Sridharan, S.: Large-scale analysis of formations in soccer. In: Proceedings of the 2013 International Conference on Digital Image Computing: Techniques and Applications (DICTA), pp. 1\u20138. Hobart, TAS, Australia (2013). https:\/\/doi.org\/10.1109\/DICTA.2013.6691503","DOI":"10.1109\/DICTA.2013.6691503"},{"key":"44_CR17","doi-asserted-by":"publisher","unstructured":"Bialkowski, A., Lucey, P., Carr, P., Yue, Y., Sridharan, S., Matthews, I.: Large-scale analysis of soccer matches using spatiotemporal tracking data. In: Proceedings of the 2014 IEEE International Conference on Data Mining, pp. 725\u2013730, Shenzhen, China (2014). https:\/\/doi.org\/10.1109\/ICDM.2014.133","DOI":"10.1109\/ICDM.2014.133"},{"key":"44_CR18","unstructured":"Vaswani, A., et al.: Attention is all you need. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, pp. 6000\u20136010. Curran Associates Inc., Long Beach, California, USA (2017)"},{"key":"44_CR19","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. In: Proceeding of the 2019 International Conference on Learning Representations, arXiv, New Orleans, Louisiana, United States (2019)"}],"container-title":["Lecture Notes in Computer Science","Advanced Concepts for Intelligent Vision Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07343-3_44","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T00:13:09Z","timestamp":1767312789000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07343-3_44"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032073426","9783032073433"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07343-3_44","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACIVS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Advanced Concepts for Intelligent Vision Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tokyo","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 July 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 July 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"22","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"acivs2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.acivs2025.com","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}