{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T04:37:51Z","timestamp":1772771871345,"version":"3.50.1"},"reference-count":58,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2020,11,18]],"date-time":"2020-11-18T00:00:00Z","timestamp":1605657600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,11,18]],"date-time":"2020-11-18T00:00:00Z","timestamp":1605657600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Australian Institute of Sports"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s00521-020-05485-3","type":"journal-article","created":{"date-parts":[[2020,11,18]],"date-time":"2020-11-18T09:09:43Z","timestamp":1605690583000},"page":"7205-7223","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":10,"title":["The detection, tracking, and temporal action localisation of swimmers for automated analysis"],"prefix":"10.1007","volume":"33","author":[{"given":"Ashley","family":"Hall","sequence":"first","affiliation":[]},{"given":"Brandon","family":"Victor","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0302-5775","authenticated-orcid":false,"given":"Zhen","family":"He","sequence":"additional","affiliation":[]},{"given":"Matthias","family":"Langer","sequence":"additional","affiliation":[]},{"given":"Marc","family":"Elipot","sequence":"additional","affiliation":[]},{"given":"Aiden","family":"Nibali","sequence":"additional","affiliation":[]},{"given":"Stuart","family":"Morgan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,11,18]]},"reference":[{"key":"5485_CR1","unstructured":"Coco: Common obejcts in context. http:\/\/cocodataset.org\/. Accessed 22 Nov 2019"},{"key":"5485_CR2","unstructured":"Multiple object tracking benchmark. https:\/\/motchallenge.net\/"},{"issue":"1","key":"5485_CR3","doi-asserted-by":"publisher","first-page":"246309","DOI":"10.1155\/2008\/246309","volume":"2008","author":"K Bernardin","year":"2008","unstructured":"Bernardin K, Stiefelhagen R (2008) Evaluating multiple object tracking performance: the CLEAR MOT metrics. EURASIP J Image Video Process 2008(1):246309. https:\/\/doi.org\/10.1155\/2008\/246309","journal-title":"EURASIP J Image Video Process"},{"key":"5485_CR4","doi-asserted-by":"crossref","unstructured":"Bewley A, Ge Z, Ott L, Ramos F, Upcroft B (2016) Simple online and realtime tracking. In: 2016 IEEE international conference on image processing (ICIP), pp 3464\u20133468. IEEE","DOI":"10.1109\/ICIP.2016.7533003"},{"key":"5485_CR5","doi-asserted-by":"crossref","unstructured":"Buch S, Escorcia V, Ghanem B, Fei-Fei L, Niebles JC (2017) End-to-end, single-stream temporal action detection in untrimmed videos. In: BMVC, vol 2, p 7","DOI":"10.5244\/C.31.93"},{"key":"5485_CR6","doi-asserted-by":"crossref","unstructured":"Caba Heilbron F, Carlos Niebles J, Ghanem B (2016) Fast temporal activity proposals for efficient detection of human actions in untrimmed videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1914\u20131923","DOI":"10.1109\/CVPR.2016.211"},{"key":"5485_CR7","doi-asserted-by":"crossref","unstructured":"Chao YW, Vijayanarasimhan S, Seybold B, Ross DA, Deng J, Sukthankar R (2018) Rethinking the faster r-cnn architecture for temporal action localization. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1130\u20131139","DOI":"10.1109\/CVPR.2018.00124"},{"key":"5485_CR8","doi-asserted-by":"crossref","unstructured":"Dai X, Singh B, Zhang G, Davis LS, Qiu\u00a0Chen Y (2017) Temporal context network for activity localization in videos. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 5793\u20135802","DOI":"10.1109\/ICCV.2017.610"},{"key":"5485_CR9","doi-asserted-by":"crossref","unstructured":"Duan K, Bai S, Xie L, Qi H, Huang Q, Tian Q (2019) Centernet: Keypoint triplets for object detection. In: Proceedings of international conference in computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00667"},{"key":"5485_CR10","doi-asserted-by":"crossref","unstructured":"Einfalt M, Zecha D, Lienhart R (2018) Activity-conditioned continuous human pose estimation for performance analysis of athletes using the example of swimming. In: 2018 IEEE winter conference on applications of computer vision (WACV), pp 446\u2013455. IEEE","DOI":"10.1109\/WACV.2018.00055"},{"key":"5485_CR11","unstructured":"Everingham M, Van\u00a0Gool L, Williams CKI, Winn J, Zisserman A (2012) The PASCAL visual object classes challenge (VOC2012) results. http:\/\/www.pascal-network.org\/challenges\/VOC\/voc2012\/workshop\/index.html"},{"key":"5485_CR12","doi-asserted-by":"crossref","unstructured":"Fani H, Mirlohi A, Hosseini H, Herperst R (2018) Swim stroke analytic: front crawl pulling pose classification. In: 2018 25th IEEE international conference on image processing (ICIP), pp 4068\u20134072. IEEE","DOI":"10.1109\/ICIP.2018.8451756"},{"key":"5485_CR13","doi-asserted-by":"crossref","unstructured":"Gao J, Yang Z, Chen K, Sun C, Nevatia R (2017) Turn tap: temporal unit regression network for temporal action proposals. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 3628\u20133636","DOI":"10.1109\/ICCV.2017.392"},{"key":"5485_CR14","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"5485_CR15","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition(CVPR), pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"issue":"4","key":"5485_CR16","doi-asserted-by":"publisher","first-page":"219","DOI":"10.2299\/jsp.22.219","volume":"22","author":"K Hakozaki","year":"2018","unstructured":"Hakozaki K, Kato N, Tanabiki M, Furuyama J, Sato Y, Aoki Y (2018) Swimmer\u2019s stroke estimation using cnn and multilstm. J Sig Process 22(4):219\u2013222","journal-title":"J Sig Process"},{"key":"5485_CR17","doi-asserted-by":"crossref","unstructured":"Hammad M, P\u0142awiak P, Wang K, Acharya UR (2020) Resnet-attention model for human authentication using ECG signals. Expert Syst p e12547","DOI":"10.1111\/exsy.12547"},{"key":"5485_CR18","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask r-cnn. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"5485_CR19","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"5485_CR20","doi-asserted-by":"crossref","unstructured":"Huang Y, Dai Q, Lu Y (2019) Decoupling localization and classification in single shot temporal action detection. In: IEEE international conference on multimedia and expo (ICME)","DOI":"10.1109\/ICME.2019.00224"},{"key":"5485_CR21","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. arXiv preprint arXiv:1502.03167"},{"issue":"1\u20132","key":"5485_CR22","doi-asserted-by":"publisher","first-page":"83","DOI":"10.1002\/nav.3800020109","volume":"2","author":"HW Kuhn","year":"1955","unstructured":"Kuhn HW (1955) The hungarian method for the assignment problem. Nav Res Log Quart 2(1\u20132):83\u201397","journal-title":"Nav Res Log Quart"},{"key":"5485_CR23","doi-asserted-by":"crossref","unstructured":"Law H, Deng J (2018) Cornernet: detecting objects as paired keypoints. In: Proceedings of the European conference on computer vision (ECCV), pp 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"5485_CR24","doi-asserted-by":"crossref","unstructured":"Leal-Taix\u00e9 L, Fenzi M, Kuznetsova A, Rosenhahn B, Savarese S (2014) Learning an image-based motion context for multiple people tracking. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3542\u20133549","DOI":"10.1109\/CVPR.2014.453"},{"key":"5485_CR25","unstructured":"Leal-Taix\u00e9 L, Milan A, Reid I, Roth S, Schindler K (2015) Motchallenge 2015: Towards a benchmark for multi-target tracking. arXiv preprint arXiv:1504.01942"},{"key":"5485_CR26","doi-asserted-by":"crossref","unstructured":"Leal-Taix\u00e9 L, Pons-Moll G, Rosenhahn B (2011) Everybody needs somebody: modeling social and grouping behavior on a linear programming multiple people tracker. In: 2011 IEEE international conference on computer vision workshops (ICCV workshops), pp 120\u2013127. IEEE","DOI":"10.1109\/ICCVW.2011.6130233"},{"key":"5485_CR27","doi-asserted-by":"crossref","unstructured":"Lin T, Zhao X, Shou Z (2017) Single shot temporal action detection. In: Proceedings of the 25th ACM international conference on multimedia, pp 988\u2013996. ACM","DOI":"10.1145\/3123266.3123343"},{"key":"5485_CR28","doi-asserted-by":"crossref","unstructured":"Lin T, Zhao X, Su H, Wang C, Yang M (2018) BSN: boundary sensitive network for temporal action proposal generation. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"5485_CR29","doi-asserted-by":"crossref","unstructured":"Lin TY, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition(CVPR), pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"5485_CR30","doi-asserted-by":"crossref","unstructured":"Lin TY, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"5485_CR31","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu CY, Berg AC (2016) SSD: Single shot multibox detector. In: European conference on computer vision, pp 21\u201337. Springer","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"5485_CR32","doi-asserted-by":"crossref","unstructured":"Nibali A, He Z, Morgan S, Greenwood D (2017) Extraction and classification of diving clips from continuous video footage. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 38\u201348","DOI":"10.1109\/CVPRW.2017.18"},{"key":"5485_CR33","unstructured":"Nibali A, He Z, Morgan S, Prendergast L (2018) Numerical coordinate regression with convolutional neural networks. arXiv preprint arXiv:1801.07372"},{"key":"5485_CR34","unstructured":"Paszke A, Gross S, Chintala S, Chanan G, Yang E, DeVito Z, Lin Z, Desmaison A, Antiga L, Lerer A (2017) Automatic differentiation in pytorch"},{"key":"5485_CR35","doi-asserted-by":"crossref","unstructured":"Pirsiavash H, Ramanan D, Fowlkes CC (2011) Globally-optimal greedy algorithms for tracking a variable number of objects. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1201\u20131208. IEEE","DOI":"10.1109\/CVPR.2011.5995604"},{"key":"5485_CR36","doi-asserted-by":"publisher","first-page":"105740","DOI":"10.1016\/j.asoc.2019.105740","volume":"84","author":"P P\u0142awiak","year":"2019","unstructured":"P\u0142awiak P, Abdar M, Acharya UR (2019) Application of new deep genetic cascade ensemble of svm classifiers to predict the australian credit scoring. Appl Soft Comput 84:105740","journal-title":"Appl Soft Comput"},{"key":"5485_CR37","doi-asserted-by":"publisher","first-page":"401","DOI":"10.1016\/j.ins.2019.12.045","volume":"516","author":"P P\u0142awiak","year":"2020","unstructured":"P\u0142awiak P, Abdar M, P\u0142awiak J, Makarenkov V, Acharya UR (2020) Dghnl: a new deep genetic hierarchical network of learners for prediction of credit scoring. Inf Sci 516:401\u2013418","journal-title":"Inf Sci"},{"key":"5485_CR38","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition(CVPR), pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"5485_CR39","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition(CVPR), pp 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"5485_CR40","unstructured":"Ren S, He K, Girshick R, Sun J (2015) Faster r-cnn: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems, pp 91\u201399"},{"key":"5485_CR41","doi-asserted-by":"crossref","unstructured":"Ristani E, Solera F, Zou R, Cucchiara R, Tomasi C (2016) Performance measures and a data set for multi-target, multi-camera tracking. In: Computer vision\u2014ECCV 2016 workshops, pp 17\u201335. Springer International Publishing, Cham","DOI":"10.1007\/978-3-319-48881-3_2"},{"key":"5485_CR42","doi-asserted-by":"crossref","unstructured":"Sadeghian A, Alahi A, Savarese S (2017) Tracking the untrackable: Learning to track multiple cues with long-term dependencies. In: Proceedings of the IEEE international conference on computer vision (CVPR), pp 300\u2013311","DOI":"10.1109\/ICCV.2017.41"},{"key":"5485_CR43","doi-asserted-by":"crossref","unstructured":"Shou Z, Chan J, Zareian A, Miyazawa K, Chang SF (2017) Cdc: Convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5734\u20135743","DOI":"10.1109\/CVPR.2017.155"},{"key":"5485_CR44","doi-asserted-by":"crossref","unstructured":"Shou Z, Wang D, Chang SF (2016) Temporal action localization in untrimmed videos via multi-stage cnns. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1049\u20131058","DOI":"10.1109\/CVPR.2016.119"},{"key":"5485_CR45","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2019) FCOS: fully convolutional one-stage object detection. In: Proceedings of international conference in computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00972"},{"key":"5485_CR46","unstructured":"Tompson JJ, Jain A, LeCun Y, Bregler C (2014) Joint training of a convolutional network and a graphical model for human pose estimation. In: Advances in neural information processing systems, pp 1799\u20131807"},{"key":"5485_CR47","doi-asserted-by":"crossref","unstructured":"Tsumita T, Shishido H, Kitahara I, Kameda Y (2019) Swimmer position estimation by lane rectification. In: International workshop on advanced image technology (IWAIT) 2019, vol 11049, p 110490E. International Society for Optics and Photonics","DOI":"10.1117\/12.2521242"},{"issue":"3","key":"5485_CR48","doi-asserted-by":"publisher","first-page":"2119","DOI":"10.1007\/s11227-020-03205-1","volume":"76","author":"T Tuncer","year":"2020","unstructured":"Tuncer T, Ertam F, Dogan S, Aydemir E, P\u0142awiak P (2020) Ensemble residual network-based gender and activity recognition method with signals. J Supercomput 76(3):2119\u20132138","journal-title":"J Supercomput"},{"key":"5485_CR49","doi-asserted-by":"crossref","unstructured":"Victor B, He Z, Morgan S, Miniutti D (2017) Continuous video to simple signals for swimming stroke detection with convolutional neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 66\u201375","DOI":"10.1109\/CVPRW.2017.21"},{"key":"5485_CR50","doi-asserted-by":"crossref","unstructured":"Wang M, Liu Y, Huang Z (2017) Large margin object tracking with circulant feature maps. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4021\u20134029","DOI":"10.1109\/CVPR.2017.510"},{"key":"5485_CR51","doi-asserted-by":"crossref","unstructured":"Wojke N, Bewley A, Paulus D (2017) Simple online and realtime tracking with a deep association metric. In: 2017 IEEE international conference on image processing (ICIP), pp 3645\u20133649. IEEE","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"5485_CR52","doi-asserted-by":"crossref","unstructured":"Wu Y, He K (2018) Group normalization. In: Proceedings of the European conference on computer vision (ECCV), pp 3\u201319","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"5485_CR53","doi-asserted-by":"crossref","unstructured":"Xu H, Das A, Saenko K (2017) R-c3d: region convolutional 3d network for temporal activity detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 5783\u20135792","DOI":"10.1109\/ICCV.2017.617"},{"key":"5485_CR54","doi-asserted-by":"crossref","unstructured":"Zecha D, Einfalt M, Lienhart R (2019) Refining joint locations for human pose tracking in sports videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition workshops, pp 0\u20130","DOI":"10.1109\/CVPRW.2019.00308"},{"key":"5485_CR55","unstructured":"Zhang L, Li Y, Nevatia R (2008) Global data association for multi-object tracking using network flows. In: 2008 IEEE conference on computer vision and pattern recognition (CVPR), pp 1\u20138. IEEE"},{"key":"5485_CR56","doi-asserted-by":"crossref","unstructured":"Zhao Y, Xiong Y, Wang L, Wu Z, Tang X, Lin D (2017) Temporal action detection with structured segment networks. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2914\u20132923","DOI":"10.1109\/ICCV.2017.317"},{"key":"5485_CR57","doi-asserted-by":"crossref","unstructured":"Zhu J, Yang H, Liu N, Kim M, Zhang W, Yang MH (2018) Online multi-object tracking with dual matching attention networks. In: Proceedings of the European conference on computer vision (ECCV), pp 366\u2013382","DOI":"10.1007\/978-3-030-01228-1_23"},{"key":"5485_CR58","volume-title":"Multiple view geometry in computer vision","author":"RHA Zisserman","year":"2004","unstructured":"Zisserman RHA (2004) Multiple view geometry in computer vision. Cambridge University Press, Cambridge"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05485-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-020-05485-3\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05485-3.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,5,30]],"date-time":"2021-05-30T19:36:20Z","timestamp":1622403380000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-020-05485-3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,11,18]]},"references-count":58,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["5485"],"URL":"https:\/\/doi.org\/10.1007\/s00521-020-05485-3","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,11,18]]},"assertion":[{"value":"17 December 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 November 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"This work was funded by a competitive innovation fund from the Australian Institute of Sports. The Project is titled \u201cA software system for automated annotation of swimming videos using deep learning\u201d. There are no other conflicts to declare.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}