{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T18:08:20Z","timestamp":1749578900735,"version":"3.37.3"},"reference-count":74,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2022,6,14]],"date-time":"2022-06-14T00:00:00Z","timestamp":1655164800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,6,14]],"date-time":"2022-06-14T00:00:00Z","timestamp":1655164800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,1]]},"DOI":"10.1007\/s11042-022-13307-y","type":"journal-article","created":{"date-parts":[[2022,6,14]],"date-time":"2022-06-14T21:02:51Z","timestamp":1655240571000},"page":"1237-1259","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":5,"title":["Learning cricket strokes from spatial and motion visual word sequences"],"prefix":"10.1007","volume":"82","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9417-3169","authenticated-orcid":false,"given":"Arpan","family":"Gupta","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Sakthi Balan","family":"Muthiah","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,6,14]]},"reference":[{"key":"13307_CR1","unstructured":"Bradski G The OpenCV Library. Dr. Dobb\u2019s Journal of Software Tools (2000)"},{"key":"13307_CR2","doi-asserted-by":"crossref","unstructured":"Cai Z, Neher H, Vats K, Clausi D A, Zelek J S (2018) Temporal hockey action recognition via pose and optical flows. arXiv:1812.09533","DOI":"10.1109\/CVPRW.2019.00310"},{"key":"13307_CR3","doi-asserted-by":"crossref","unstructured":"Carreira J , Zisserman A (2017) Quo Vadis, action recognition? A new model and the kinetics dataset. arXiv:1705.07750","DOI":"10.1109\/CVPR.2017.502"},{"key":"13307_CR4","doi-asserted-by":"crossref","unstructured":"Cho K, van Merrienboer B, Bahdanau D, Bengio Y (2014) On the properties of neural machine translation: encoder-decoder approaches. arXiv:1409.1259","DOI":"10.3115\/v1\/W14-4012"},{"key":"13307_CR5","doi-asserted-by":"crossref","unstructured":"Cioppa A, Deliege A, Giancola S, Ghanem B, Droogenbroeck M V, Gade R, Moeslund T B (2020) A context-aware loss function for action spotting in soccer videos. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR42600.2020.01314"},{"key":"13307_CR6","unstructured":"Chung J, G\u00fcl\u00e7ehre C, Cho K, Bengio Y (2014) Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv:1412.3555"},{"key":"13307_CR7","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: 2005 IEEE computer society conference on computer vision and pattern recognition (CVPR\u201905), vol 1, pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"key":"13307_CR8","doi-asserted-by":"crossref","unstructured":"Deliege A, Cioppa A, Giancola S, Seikavandi M J, Dueholm J V, Nasrollahi K, Ghanem B, Moeslund T B, Van Droogenbroeck M (2021) Soccernet-v2: a dataset and benchmarks for holistic understanding of broadcast soccer videos. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR) workshops, pp 4508\u20134519","DOI":"10.1109\/CVPRW53098.2021.00508"},{"key":"13307_CR9","unstructured":"Digital Gaming Technology (DGT). http:\/\/www.digitalgametechnology.com\/index.php\/products\/electronic-boardshttp:\/\/www.digitalgametechnology.com\/index.php\/products\/electronic-boards. Accessed 15 Sept 2021"},{"issue":"4","key":"13307_CR10","doi-asserted-by":"publisher","first-page":"677","DOI":"10.1109\/TPAMI.2016.2599174","volume":"39","author":"J Donahue","year":"2017","unstructured":"Donahue J, Hendricks L A, Rohrbach M, Venugopalan S, Guadarrama S, Saenko K, Darrell T (2017) Long-term recurrent convolutional networks for visual recognition and description. IEEE Trans Pattern Anal Mach Intell 39(4):677\u2013691. https:\/\/doi.org\/10.1109\/TPAMI.2016.2599174","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"8","key":"13307_CR11","doi-asserted-by":"publisher","first-page":"2911","DOI":"10.1016\/j.patcog.2010.03.009","volume":"43","author":"T D\u2019Orazio","year":"2010","unstructured":"D\u2019Orazio T, Leo M (2010) A review of vision-based systems for soccer video analysis. Pattern Recogn 43(8):2911\u20132926. https:\/\/doi.org\/10.1016\/j.patcog.2010.03.009","journal-title":"Pattern Recogn"},{"key":"13307_CR12","doi-asserted-by":"crossref","unstructured":"Farneb\u00e4ck G (2003) Two-frame motion estimation based on polynomial expansion. In: Proceedings of the 13th Scandinavian conference on image analysis. SCIA\u201903. Springer, Berlin, pp 363\u2013 370","DOI":"10.1007\/3-540-45103-X_50"},{"key":"13307_CR13","doi-asserted-by":"crossref","unstructured":"Faulkner H, Dick A Tenniset: a dataset for dense fine-grained event recognition, localisation and description. In: 2017 International conference on digital image computing: techniques and applications (DICTA). IEEE, pp 1\u20138","DOI":"10.1109\/DICTA.2017.8227494"},{"key":"13307_CR14","doi-asserted-by":"crossref","unstructured":"Foysal M F A, Islam M S, Karim A, Neehal N (2019) Shot-net: a convolutional neural network for classifying different cricket shots. In: Santosh K C, Hegadi R S (eds) Recent trends in image processing and pattern recognition. Springer, Singapore, pp 111\u2013120","DOI":"10.1007\/978-981-13-9181-1_10"},{"key":"13307_CR15","doi-asserted-by":"crossref","unstructured":"Giancola S, Amine M, Dghaily T, Ghanem B (2018) SoccerNet: a scalable dataset for action spotting in soccer videos. arXiv:1804.04527","DOI":"10.1109\/CVPRW.2018.00223"},{"key":"13307_CR16","doi-asserted-by":"crossref","unstructured":"Gourgari S, Goudelis G, Karpouzis K, Kollias S (2013) Thetis: three dimensional tennis shots a human action dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR) workshops","DOI":"10.1109\/CVPRW.2013.102"},{"key":"13307_CR17","unstructured":"GRU module in torch.nn. https:\/\/pytorch.org\/docs\/stable\/generated\/torch.nn.GRU.html#torch.nn.GRU. Accessed 28 Dec 2020"},{"key":"13307_CR18","doi-asserted-by":"crossref","unstructured":"Gupta A, Karel A, Muthiah S B (2021) Cricket stroke recognition using hard and soft assignment based bag of visual words. In: Singh S K, Roy P, Raman B, Nagabhushan P (eds) Computer vision and image processing. Springer, Singapore, pp 231\u2013242","DOI":"10.1007\/978-981-16-1092-9_20"},{"key":"13307_CR19","doi-asserted-by":"crossref","unstructured":"Gupta A, Karel A, Sakthi Balan M (2020) Discovering cricket stroke classes in trimmed telecast videos. In: Nain N, Vipparthi S K, Raman B (eds) Computer vision and image processing. Springer, Singapore, pp 509\u2013520","DOI":"10.1007\/978-981-15-4018-9_45"},{"key":"13307_CR20","doi-asserted-by":"crossref","unstructured":"Gupta A, Muthiah S B (2018) Temporal cricket stroke localization from untrimmed highlight videos. In: Proceedings of the 11th Indian conference on computer vision, graphics and image processing. ICVGIP 2018. Association for Computing Machinery, New York","DOI":"10.1145\/3293353.3293415"},{"key":"13307_CR21","doi-asserted-by":"publisher","first-page":"103944","DOI":"10.1016\/j.imavis.2020.103944","volume":"100","author":"A Gupta","year":"2020","unstructured":"Gupta A, Muthiah S B (2020) Viewpoint constrained and unconstrained Cricket stroke localization from untrimmed videos. Image Vis Comput 100:103944. https:\/\/doi.org\/10.1016\/j.imavis.2020.103944","journal-title":"Image Vis Comput"},{"key":"13307_CR22","doi-asserted-by":"crossref","unstructured":"Harikrishna N, Satheesh S, Sriram S D, Easwarakumar K S (2011) Temporal classification of events in cricket videos. In: 2011 National conference on communications (NCC), pp 1\u20135","DOI":"10.1109\/NCC.2011.5734784"},{"key":"13307_CR23","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Deep residual learning for image recognition. arXiv:1512.03385","DOI":"10.1109\/CVPR.2016.90"},{"key":"13307_CR24","doi-asserted-by":"publisher","first-page":"961","DOI":"10.1109\/CVPR.2015.7298698","volume":"07","author":"FC Heilbron","year":"2015","unstructured":"Heilbron F C, Escorcia V, Ghanem B, Niebles J C (2015) ActivityNet: a large-scale video benchmark for human activity understanding. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit 07:961\u2013970. https:\/\/doi.org\/10.1109\/CVPR.2015.7298698","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"key":"13307_CR25","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1016\/j.imavis.2017.01.010","volume":"60","author":"S Herath","year":"2017","unstructured":"Herath S, Harandi M, Porikli F (2017) Going deeper into action recognition: a survey. Image Vis Comput 60:4\u201321. https:\/\/doi.org\/10.1016\/j.imavis.2017.01.010","journal-title":"Image Vis Comput"},{"key":"13307_CR26","doi-asserted-by":"publisher","unstructured":"Hochreiter S, Schmidhuber J (November 1997) Long short-term memory. Neural Comput 9(8):1735\u20131780. https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"13307_CR27","doi-asserted-by":"crossref","unstructured":"Hui T-W, Tang X, Loy C C (2018) LiteFlowNet: a lightweight convolutional neural network for optical flow estimation. In: Proceedings of IEEE conference on computer vision and pattern recognition (CVPR), pp 8981\u20138989. http:\/\/mmlab.ie.cuhk.edu.hk\/projects\/LiteFlowNet\/","DOI":"10.1109\/CVPR.2018.00936"},{"key":"13307_CR28","doi-asserted-by":"crossref","unstructured":"Ibrahim M S, Muralidharan S, Deng Z, Vahdat A, Mori G (2016) A hierarchical deep temporal model for group activity recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1109\/CVPR.2016.217"},{"key":"13307_CR29","doi-asserted-by":"publisher","unstructured":"Ji S, Xu W, Yang M, Yu K (2013Jan) 3D convolutional neural networks for human action recognition. IEEE Trans Pattern Anal Mach Intell 35 (1):221\u2013231. https:\/\/doi.org\/10.1109\/TPAMI.2012.59","DOI":"10.1109\/TPAMI.2012.59"},{"issue":"1","key":"13307_CR30","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1109\/TPAMI.2010.68","volume":"33","author":"IN Junejo","year":"2011","unstructured":"Junejo I N, Dexter E, Laptev I, P\u00e9rez P (2011) View-independent action recognition from temporal self-similarities. IEEE Trans Pattern Anal Mach Intell 33(1):172\u2013185. https:\/\/doi.org\/10.1109\/TPAMI.2010.68https:\/\/doi.org\/10.1109\/TPAMI.2010.68","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"13307_CR31","doi-asserted-by":"publisher","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Fei-Fei L (2014) Large-scale video classification with convolutional neural networks. In: 2014 IEEE Conference on computer vision and pattern recognition (CVPR), pp 1725\u20131732. https:\/\/doi.org\/10.1109\/CVPR.2014.223","DOI":"10.1109\/CVPR.2014.223"},{"key":"13307_CR32","unstructured":"Kay W, Carreira J, Simonyan K, Zhang B, Hillier C, Vijayanarasimhan S, Viola F, Green T, Back T, Natsev P, Suleyman M, Zisserman A (2017) The kinetics human action video dataset. arXiv:1705.06950"},{"key":"13307_CR33","unstructured":"Kingma D, Ba J (2014) Adam: a method for stochastic optimization, pp 1\u201315, arXiv:1412.6980"},{"key":"13307_CR34","doi-asserted-by":"crossref","unstructured":"Kolekar M H, Palaniappan K, Sengupta S (2008) Semantic event detection and classification in cricket video sequence. 2008 Sixth Indian conference on computer vision, graphics image processing, pp 382\u2013389","DOI":"10.1109\/ICVGIP.2008.102"},{"issue":"1","key":"13307_CR35","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1007\/s11042-010-0544-9","volume":"54","author":"MH Kolekar","year":"2011","unstructured":"Kolekar M H (2011) Bayesian belief network based broadcast sports video indexing. Multimed Tools Appl 54(1):27\u201354. https:\/\/doi.org\/10.1007\/s11042-010-0544-9","journal-title":"Multimed Tools Appl"},{"issue":"3","key":"13307_CR36","doi-asserted-by":"publisher","first-page":"545","DOI":"10.1007\/s11042-009-0337-1","volume":"47","author":"MH Kolekar","year":"2010","unstructured":"Kolekar M H, Sengupta S (2010) Semantic concept mining in cricket videos for automated highlight generation. Multimed Tools Applic 47(3):545\u2013579. https:\/\/doi.org\/10.1007\/s11042-009-0337-1","journal-title":"Multimed Tools Applic"},{"key":"13307_CR37","unstructured":"Krizhevsky A, Sutskever I, Hinton G E (2012) ImageNet classification with deep convolutional neural networks. In: Pereira F, Burges C J C, Bottou L, Weinberger K Q (eds) Advances in neural information processing systems 25. Curran Associates, Inc., pp 1097\u20131105"},{"key":"13307_CR38","doi-asserted-by":"publisher","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) HMDB: A large video database for human motion recognition. Proc IEEE Int Conf Comput Vision :2556\u20132563. https:\/\/doi.org\/10.1109\/ICCV.2011.6126543https:\/\/doi.org\/10.1109\/ICCV.2011.6126543","DOI":"10.1109\/ICCV.2011.6126543 10.1109\/ICCV.2011.6126543"},{"key":"13307_CR39","doi-asserted-by":"crossref","unstructured":"Kulkarni K M, Shenoy S (2021) Table tennis stroke recognition using two-dimensional human pose estimation. arXiv:2104.09907","DOI":"10.1109\/CVPRW53098.2021.00515"},{"key":"13307_CR40","doi-asserted-by":"publisher","unstructured":"Kumar A, Garg J, Mukerjee A (2014) Cricket activity detection. In: International image processing, applications and systems conference, IPAS 2014, pp 1\u20136. https:\/\/doi.org\/10.1109\/IPAS.2014.7043264","DOI":"10.1109\/IPAS.2014.7043264"},{"key":"13307_CR41","unstructured":"Language Modeling with nn.Transformer and TorchText. https:\/\/pytorch.org\/tutorials\/beginner\/transformer_tutorial.html. Accessed 08 Aug 2021"},{"key":"13307_CR42","doi-asserted-by":"crossref","unstructured":"Lazarescu M, Venkatesh S, West G (2002) On the automatic indexing of cricket using camera motion parameters. Proceedings. In: IEEE International Conference on Multimedia and Expo, vol 1. pp 809\u2013812","DOI":"10.1109\/ICME.2002.1035905"},{"issue":"2","key":"13307_CR43","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1016\/j.trit.2016.10.001","volume":"1","author":"H Liu","year":"2016","unstructured":"Liu H, Tang H, Xiao W, Guo Z, Tian L, Gao Y (2016) Sequential bag-of-words model for human action classification. CAAI Trans Intell Technol 1(2):125\u2013136. https:\/\/doi.org\/10.1016\/j.trit.2016.10.001","journal-title":"CAAI Trans Intell Technol"},{"key":"13307_CR44","doi-asserted-by":"crossref","unstructured":"Liu J, Carr P, Collins R T, Liu Y (2013) Tracking sports players with context-conditioned motion models. In: 2013 IEEE Conference on computer vision and pattern recognition, pp 1830\u20131837","DOI":"10.1109\/CVPR.2013.239"},{"issue":"07","key":"13307_CR45","doi-asserted-by":"publisher","first-page":"1704","DOI":"10.1109\/TPAMI.2012.242","volume":"35","author":"W-L Lu","year":"2013","unstructured":"Lu W-L, Ting J, Little J J, Murphy K P (2013) Learning to track and identify players from broadcast sports videos. IEEE Trans Pattern Anal Mach Intell 35(07):1704\u20131716. https:\/\/doi.org\/10.1109\/TPAMI.2012.242https:\/\/doi.org\/10.1109\/TPAMI.2012.242","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"13307_CR46","unstructured":"Lucas B D, Kanade T (1981) An iterative image registration technique with an application to stereo vision. In: Proceedings of the 7th international joint conference on artificial intelligence - volume 2. IJCAI\u201981. Morgan Kaufmann Publishers Inc., San Francisco, pp 674\u2013679"},{"key":"13307_CR47","unstructured":"Moeslund T, Thomas G, Hilton A, Little J, Merler M, Gade R CVSports \u2014 7th International workshop on computer vision in sports (CVsports) at CVPR 2021. http:\/\/www.vap.aau.dk\/cvsports\/. Accessed 15 Sept 2021"},{"key":"13307_CR48","doi-asserted-by":"crossref","unstructured":"Moodley T, van der Haar D (2020) Casrm: cricket automation and stroke recognition model using openpose. In: Duffy V G (ed) Digital human modeling and applications in health, safety, ergonomics and risk management. Posture, motion and health. Springer International Publishing, Cham, pp 67\u201378","DOI":"10.1007\/978-3-030-49904-4_5"},{"key":"13307_CR49","doi-asserted-by":"crossref","unstructured":"Moodley T, van der Haar D (2020) Cricket stroke recognition using computer vision methods. In: Kim K J, Kim H-Y (eds) Information science and applications. Springer, Singapore, pp 171\u2013181","DOI":"10.1007\/978-981-15-1465-4_18"},{"key":"13307_CR50","doi-asserted-by":"crossref","unstructured":"Najafzadeh N, Fotouhi M, Kasaei S (2015) Multiple soccer players tracking. In: 2015 The international symposium on artificial intelligence and signal processing (AISP), pp 310\u2013315","DOI":"10.1109\/AISP.2015.7123503"},{"key":"13307_CR51","unstructured":"Peng X, Wang L, Wang X, Qiao Y (2014) Bag of visual words and fusion methods for action recognition: comprehensive study and good practice. arXiv:1405.4506"},{"key":"13307_CR52","doi-asserted-by":"crossref","unstructured":"Piergiovanni AJ, Ryoo M S (2018) Fine-grained activity recognition in baseball videos. In: The IEEE Conference on computer vision and pattern recognition (CVPR) workshops","DOI":"10.1109\/CVPRW.2018.00226"},{"key":"13307_CR53","doi-asserted-by":"crossref","unstructured":"Pramod Sankar K, Pandey S, Jawahar C V (2006) Text driven temporal segmentation of cricket videos. In: Proceedings of the 5th Indian conference on computer vision, graphics and image processing. ICVGIP\u201906. Springer, Berlin, pp 433\u2013444","DOI":"10.1007\/11949619_39"},{"key":"13307_CR54","doi-asserted-by":"crossref","unstructured":"Quiroga J, Carrillo H, Maldonado E, Ruiz J, Zapata L M (2020) As seen on tv: automatic basketball video production using gaussian-based actionness and game states recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR) workshops","DOI":"10.1109\/CVPRW50498.2020.00455"},{"key":"13307_CR55","doi-asserted-by":"crossref","unstructured":"Ramanathan V, Huang J, Abu-El-Haija S, Gorban A N, Murphy K, Fei-Fei L (2015) Detecting events and key actors in multi-person videos. arXiv:1511.02917","DOI":"10.1109\/CVPR.2016.332"},{"key":"13307_CR56","doi-asserted-by":"crossref","unstructured":"Ravinder M, Venugopal T (2016) Content-based cricket video shot classification using bag-of-visual-features. In: Dash S S, Bhaskar M A, Panigrahi B K, Das S (eds) Artificial intelligence and evolutionary computations in engineering systems. Springer, New Delhi, pp 599\u2013606","DOI":"10.1007\/978-81-322-2656-7_55"},{"issue":"3","key":"13307_CR57","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M, Berg A C, Fei-Fei L (2015) ImageNet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int J Comput Vis"},{"key":"13307_CR58","doi-asserted-by":"crossref","unstructured":"Semwal A, Mishra D, Raj V, Sharma J, Mittal A (2018) Cricket shot detection from videos. In: 2018 9th International conference on computing, communication and networking technologies (ICCCNT), pp 1\u20136","DOI":"10.1109\/ICCCNT.2018.8494081"},{"key":"13307_CR59","doi-asserted-by":"crossref","unstructured":"Sharma R A, Sankar K P, Jawahar C V (2015) Fine-grain annotation of cricket videos. arXiv:1511.07607","DOI":"10.1109\/ACPR.2015.7486538"},{"key":"13307_CR60","doi-asserted-by":"publisher","unstructured":"Shih H (2018May) A survey of content-aware video analysis for sports. IEEE Trans Circ Syst Video Technol 28(5):1212\u20131231. https:\/\/doi.org\/10.1109\/TCSVT.2017.2655624","DOI":"10.1109\/TCSVT.2017.2655624"},{"key":"13307_CR61","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. arXiv:1406.2199"},{"key":"13307_CR62","doi-asserted-by":"publisher","unstructured":"Sivic J, Zisserman A (2003) Video Google: a text retrieval approach to object matching in videos. In: Proceedings Ninth IEEE international conference on computer vision, vol 2, pp 1470\u20131477. https:\/\/doi.org\/10.1109\/ICCV.2003.1238663","DOI":"10.1109\/ICCV.2003.1238663"},{"key":"13307_CR63","unstructured":"Soomro K, Zamir A R, Shah M (2012) UCF101: a dataset of 101 human actions classes from videos in the wild. arXiv:1212.0402"},{"key":"13307_CR64","unstructured":"Sutskever I, Vinyals O, Le Q V (2014) Sequence to sequence learning with neural networks. In: Ghahramani Z, Welling M, Cortes C, Lawrence N, Weinberger K Q (eds) Advances in neural information processing systems. https:\/\/proceedings.neurips.cc\/paper\/2014\/file\/a14ac55a4f27472c5d894ec1c3c743d2-Paper.pdf. Accessed 15 Sept 2021, vol 27. Curran Associates, Inc."},{"key":"13307_CR65","doi-asserted-by":"crossref","unstructured":"Teachabarikiti K, Chalidabhongse T H, Thammano A (2010) Players tracking and ball detection for an automatic tennis video annotation. In: 2010 11th International conference on control automation robotics vision, pp 2461\u20132494","DOI":"10.1109\/ICARCV.2010.5707906"},{"key":"13307_CR66","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1016\/j.cviu.2017.04.011","volume":"159","author":"G Thomas","year":"2017","unstructured":"Thomas G, Gade R, Moeslund T B, Carr P, Hilton A (2017) Computer vision for sports: current applications and research topics. Comput Vis Image Underst 159:3\u201318. https:\/\/doi.org\/10.1016\/j.cviu.2017.04.011https:\/\/doi.org\/10.1016\/j.cviu.2017.04.011","journal-title":"Comput Vis Image Underst"},{"key":"13307_CR67","unstructured":"Trace Bot. https:\/\/traceup.com\/soccer\/how-it-works. Accessed 15 Sept 2021"},{"key":"13307_CR68","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R, Torresani L, Paluri M (2015) Learning spatiotemporal features with 3d convolutional networks. In: The IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2015.510"},{"issue":"7","key":"13307_CR69","doi-asserted-by":"publisher","first-page":"1271","DOI":"10.1109\/TPAMI.2009.132","volume":"32","author":"JC van Gemert","year":"2010","unstructured":"van Gemert J C, Veenman C J, Smeulders A W M, Geusebroek J (2010) Visual word ambiguity. IEEE Trans Pattern Anal Mach Intell 32 (7):1271\u20131283","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"13307_CR70","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez A N, Kaiser L , Polosukhin I (2017) Attention is all you need. In: Guyon I, Luxburg U V, Bengio S, Wallach H, Fergus R, Vishwanathan S, Garnett R (eds) Advances in neural information processing systems. https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf. Accessed 15 Sept 2021, vol 30. Curran Associates, Inc."},{"key":"13307_CR71","unstructured":"Veo \u2014 Sports Camera. https:\/\/event.veo.co. Accessed 15 Sept 2021"},{"key":"13307_CR72","doi-asserted-by":"publisher","unstructured":"Yan X, Lou Z, Hu S, Ye Y (2020) Multi-task information bottleneck co-clustering for unsupervised cross-view human action categorization. ACM Trans Knowl Discov Data 14(2). https:\/\/doi.org\/10.1145\/3375394","DOI":"10.1145\/3375394"},{"key":"13307_CR73","doi-asserted-by":"crossref","unstructured":"Yao A, Uebersax D, Gall J, Van Gool L (2010) Tracking People in broadcast sports. In: Goesele M, Roth S, Kuijper A, Schiele B, Schindler K (eds) Pattern recognition. Springer, Berlin, pp 151\u2013161","DOI":"10.1007\/978-3-642-15986-2_16"},{"key":"13307_CR74","doi-asserted-by":"crossref","unstructured":"Zhu G, Xu C, Huang Q, Gao W (2006) Automatic multi-player detection and tracking in broadcast sports video using support vector machine and particle filter. In: 2006 IEEE International conference on multimedia and expo, pp 1629\u20131632","DOI":"10.1109\/ICME.2006.262859"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13307-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-022-13307-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-022-13307-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,29]],"date-time":"2022-12-29T01:29:04Z","timestamp":1672277344000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-022-13307-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,6,14]]},"references-count":74,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2023,1]]}},"alternative-id":["13307"],"URL":"https:\/\/doi.org\/10.1007\/s11042-022-13307-y","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2022,6,14]]},"assertion":[{"value":"9 February 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"26 January 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 May 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 June 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}]}}