{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,31]],"date-time":"2025-10-31T08:01:40Z","timestamp":1761897700506,"version":"3.37.3"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2022,3,7]],"date-time":"2022-03-07T00:00:00Z","timestamp":1646611200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,3,7]],"date-time":"2022-03-07T00:00:00Z","timestamp":1646611200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100003093","name":"Ministry of Higher Education Malaysia","doi-asserted-by":"crossref","award":["FRGS\/1\/2019\/ICT02\/USM\/02\/1"],"award-info":[{"award-number":["FRGS\/1\/2019\/ICT02\/USM\/02\/1"]}],"id":[{"id":"10.13039\/501100003093","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s00521-022-07102-x","type":"journal-article","created":{"date-parts":[[2022,3,7]],"date-time":"2022-03-07T03:02:45Z","timestamp":1646622165000},"page":"8479-8499","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":11,"title":["Weakly-supervised temporal action localization: a survey"],"prefix":"10.1007","volume":"34","author":[{"given":"AbdulRahman","family":"Baraka","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3300-3270","authenticated-orcid":false,"given":"Mohd Halim","family":"Mohd Noor","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,3,7]]},"reference":[{"key":"7102_CR1","unstructured":"Lin X, Shou Z, Chang S-F (2019) Towards train-test consistency for semi-supervised temporal action localization, [Online]. Available: http:\/\/arxiv.org\/abs\/1910.11285"},{"key":"7102_CR2","doi-asserted-by":"crossref","unstructured":"Ma F et al (2020) SF-Net: single-frame supervision for temporal action localization, [Online]. Available: http:\/\/arxiv.org\/abs\/2003.06845","DOI":"10.1007\/978-3-030-58548-8_25"},{"key":"7102_CR3","unstructured":"Ding X, Wang N, Gao X, Li J, Wang X, and Liu T (2020) Weakly supervised temporal action localization with segment-level labels, 1(c), [Online]. Available: http:\/\/arxiv.org\/abs\/2007.01598"},{"key":"7102_CR4","doi-asserted-by":"publisher","unstructured":"Sun C, Shetty S, Sukthankar R, and Nevatia R (2015) Temporal localization of fine-grained actions in videos by domain transfer from web images. In: MM 2015 - Proc. 2015 ACM Multimed. Conf., pp. 371\u2013380. https:\/\/doi.org\/10.1145\/2733373.2806226","DOI":"10.1145\/2733373.2806226"},{"key":"7102_CR5","doi-asserted-by":"publisher","unstructured":"Park J, Lee J, Jeon S, Kim S, and Sohn K (2019) Graph regularization network with semantic affinity for weakly-supervised temporal action localization. In: Proceedings - international conference on image processing, ICIP, 2019:3701\u20133705. https:\/\/doi.org\/10.1109\/ICIP.2019.8803589","DOI":"10.1109\/ICIP.2019.8803589"},{"key":"7102_CR6","doi-asserted-by":"publisher","unstructured":"Nguyen P, Han B, Liu T, and Prasad G (2018) Weakly supervised action localization by sparse temporal pooling network. In: Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit, pp. 6752\u20136761. https:\/\/doi.org\/10.1109\/CVPR.2018.00706","DOI":"10.1109\/CVPR.2018.00706"},{"key":"7102_CR7","doi-asserted-by":"publisher","unstructured":"Narayan S, Cholakkal H, Khan FS, and Shao L (2019) 3C-Net: category count and center loss for weakly-supervised action localization. Proc IEEE Int Conf Comput Vis 2019: 8678\u20138686. https:\/\/doi.org\/10.1109\/ICCV.2019.00877","DOI":"10.1109\/ICCV.2019.00877"},{"key":"7102_CR8","unstructured":"Wang C, Cai H, Zou Y, and Xiong Y (2021) RGB stream is enough for temporal action detection, [Online]. Available: http:\/\/arxiv.org\/abs\/2107.04362"},{"key":"7102_CR9","doi-asserted-by":"crossref","unstructured":"Alwassel H, Giancola S, and Ghanem B (2020) TSP: temporally-sensitive pretraining of video encoders for localization tasks, [Online]. Available: http:\/\/arxiv.org\/abs\/2011.11479","DOI":"10.1109\/ICCVW54120.2021.00356"},{"key":"7102_CR10","unstructured":"Nawhal M and Mori G (2021) Activity graph transformer for temporal action localization, [Online]. Available: http:\/\/arxiv.org\/abs\/2101.08540"},{"key":"7102_CR11","unstructured":"Alwassel H, Pardo A, Heilbron FC, Thabet A, and Ghanem B (2019) RefineLoc: iterative refinement for weakly-supervised action localization, [Online]. Available: http:\/\/arxiv.org\/abs\/1904.00227"},{"key":"7102_CR12","doi-asserted-by":"publisher","unstructured":"Bojanowski P et al (2014) Weakly supervised action labeling in videos under ordering constraints. Lect. Notes Comput. Sci. (including Subser. Lect. Notes Artif. Intell. Lect. Notes Bioinformatics), vol. 8693 LNCS, no. Part 5, pp. 628\u2013643. https:\/\/doi.org\/10.1007\/978-3-319-10602-1_41","DOI":"10.1007\/978-3-319-10602-1_41"},{"key":"7102_CR13","doi-asserted-by":"publisher","unstructured":"Huang DA, Fei-Fei L, and Niebles JC (2016) Connectionist temporal modeling for weakly supervised action labelling. Lect. Notes Comput. Sci. (including Subser. Lect. Notes Artif. Intell. Lect. Notes Bioinformatics), vol. 9908 LNCS, pp. 137\u2013153. https:\/\/doi.org\/10.1007\/978-3-319-46493-0_9","DOI":"10.1007\/978-3-319-46493-0_9"},{"key":"7102_CR14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00157","author":"H Yang","year":"2018","unstructured":"Yang H, He X, Porikli F (2018) One-shot action localization by learning sequence matching network. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit. https:\/\/doi.org\/10.1109\/CVPR.2018.00157","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"key":"7102_CR15","first-page":"942","volume":"2018","author":"G Ch\u00e9ron","year":"2018","unstructured":"Ch\u00e9ron G, Alayrac JB, Laptev I, Schmid C (2018) A flexible model for training action localization with varying levels of supervision. Adv Neural Inf Process Syst 2018:942\u2013953","journal-title":"Adv Neural Inf Process Syst"},{"key":"7102_CR16","doi-asserted-by":"publisher","first-page":"70477","DOI":"10.1109\/ACCESS.2020.2986861","volume":"8","author":"H Xia","year":"2020","unstructured":"Xia H, Zhan Y (2020) A survey on temporal action localization. IEEE Access 8:70477\u201370487. https:\/\/doi.org\/10.1109\/ACCESS.2020.2986861","journal-title":"IEEE Access"},{"issue":"1","key":"7102_CR17","doi-asserted-by":"publisher","first-page":"44","DOI":"10.1093\/nsr\/nwx106","volume":"5","author":"ZH Zhou","year":"2018","unstructured":"Zhou ZH (2018) A brief introduction to weakly supervised learning. Natl Sci Rev 5(1):44\u201353. https:\/\/doi.org\/10.1093\/nsr\/nwx106","journal-title":"Natl Sci Rev"},{"key":"7102_CR18","doi-asserted-by":"publisher","unstructured":"Kolesnikov A and Lampert CH (2016) Seed, expand and constrain: three principles for weakly-supervised image segmentation. In: Lecture notes in computer science (including subseries Lecture notes in artificial intelligence and lecture notes in bioinformatics), 9908 LNCS, pp. 695\u2013711. https:\/\/doi.org\/10.1007\/978-3-319-46493-0_42","DOI":"10.1007\/978-3-319-46493-0_42"},{"key":"7102_CR19","doi-asserted-by":"publisher","first-page":"329","DOI":"10.1016\/j.patcog.2017.10.009","volume":"77","author":"MA Carbonneau","year":"2018","unstructured":"Carbonneau MA, Cheplygina V, Granger E, Gagnon G (2018) Multiple instance learning: a survey of problem characteristics and applications. Pattern Recognit 77:329\u2013353. https:\/\/doi.org\/10.1016\/j.patcog.2017.10.009","journal-title":"Pattern Recognit"},{"issue":"2","key":"7102_CR20","doi-asserted-by":"publisher","first-page":"313","DOI":"10.1007\/s10618-015-0416-z","volume":"30","author":"G Vanwinckelen","year":"2016","unstructured":"Vanwinckelen G, Tragante do VO, Fierens D, Blockeel H (2016) Instance-level accuracy versus bag-level accuracy in multi-instance learning. Data Min Knowl Discov 30(2):313\u2013341. https:\/\/doi.org\/10.1007\/s10618-015-0416-z","journal-title":"Data Min Knowl Discov"},{"key":"7102_CR21","doi-asserted-by":"publisher","unstructured":"Wang L, Xiong Y, Lin D, and Van Gool L (2017) UntrimmedNets for weakly supervised action recognition and detection. In: Proc - 30th IEEE Conf Comput Vis Pattern Recognition, CVPR 2017, 2017: 6402\u20136411. https:\/\/doi.org\/10.1109\/CVPR.2017.678.","DOI":"10.1109\/CVPR.2017.678"},{"key":"7102_CR22","doi-asserted-by":"publisher","first-page":"9070","DOI":"10.1609\/aaai.v33i01.33019070","volume":"33","author":"Y Xu","year":"2019","unstructured":"Xu Y et al (2019) Segregated temporal assembly recurrent networks for weakly supervised multiple action detection. Proc AAAI Conf Artif Intell 33:9070\u20139078. https:\/\/doi.org\/10.1609\/aaai.v33i01.33019070","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"7102_CR23","doi-asserted-by":"publisher","unstructured":"Lee P, Uh Y, and Byun H (2019) Background suppression network for weakly-supervised temporal action localization. https:\/\/doi.org\/10.1609\/aaai.v34i07.6793","DOI":"10.1609\/aaai.v34i07.6793"},{"key":"7102_CR24","doi-asserted-by":"publisher","unstructured":"Paul S, Roy S, and Roy-Chowdhury AK (2018) W-TALC: weakly-supervised temporal activity localization and classification. Lect Notes Comput Sci (including Subser Lect Notes Artif Intell Lect Notes Bioinformatics), 11208 LNCS, pp. 588\u2013607. https:\/\/doi.org\/10.1007\/978-3-030-01225-0_35","DOI":"10.1007\/978-3-030-01225-0_35"},{"key":"7102_CR25","unstructured":"Lee P, Wang J, Lu Y, and Byun H (2020) Background modeling via uncertainty estimation for weakly-supervised action localization. pp. 1\u201312, [Online]. Available: http:\/\/arxiv.org\/abs\/2006.07006"},{"key":"7102_CR26","doi-asserted-by":"publisher","unstructured":"Rashid M, Kjellstrom H, and Lee YJ (2020) Action graphs: weakly-supervised action localization with graph convolution networks. In: Proceedings - 2020 IEEE winter conference on applications of computer vision, WACV 2020, pp. 604\u2013613. https:\/\/doi.org\/10.1109\/WACV45572.2020.9093404","DOI":"10.1109\/WACV45572.2020.9093404"},{"key":"7102_CR27","doi-asserted-by":"publisher","unstructured":"Shi B, Dai Q, Mu Y, and Wang J (2020) Weakly-supervised action localization by generative attention modelling. pp. 1006\u20131016. https:\/\/doi.org\/10.1109\/cvpr42600.2020.00109","DOI":"10.1109\/cvpr42600.2020.00109"},{"key":"7102_CR28","doi-asserted-by":"publisher","unstructured":"Schindler K and Van Gool L (2008) Action snippets: How many frames does human action recognition require?. In: 26th IEEE Conf Comput Vis Pattern Recognition, CVPR. https:\/\/doi.org\/10.1109\/CVPR.2008.4587730","DOI":"10.1109\/CVPR.2008.4587730"},{"key":"7102_CR29","doi-asserted-by":"publisher","unstructured":"Liu D, Jiang T, and Wang Y (2019) Completeness modeling and context separation for weakly supervised temporal action localization. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, 2019:1298\u20131307. https:\/\/doi.org\/10.1109\/CVPR.2019.00139","DOI":"10.1109\/CVPR.2019.00139"},{"key":"7102_CR30","doi-asserted-by":"publisher","unstructured":"Singh KK and Lee YJ (2017) Hide-and-seek: forcing a network to be meticulous for weakly-supervised object and action localization. In: Proc IEEE Int Conf Comput Vis, 2017: 3544\u20133553. https:\/\/doi.org\/10.1109\/ICCV.2017.381","DOI":"10.1109\/ICCV.2017.381"},{"key":"7102_CR31","doi-asserted-by":"publisher","unstructured":"Shou Z, Gao H, Zhang L, Miyazawa K, and Chang SF (2018) AutoLoc: weakly-supervised temporal action localization in untrimmed videos. Lect Notes Comput Sci (including Subser Lect Notes Artif Intell Lect Notes Bioinformatics), vol. 11220 LNCS, pp. 162\u2013179. https:\/\/doi.org\/10.1007\/978-3-030-01270-0_10","DOI":"10.1007\/978-3-030-01270-0_10"},{"key":"7102_CR32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00400","author":"Z Liu","year":"2019","unstructured":"Liu Z et al (2019) Weakly supervised temporal action localization through contrast based evaluation networks. Proc IEEE Int Conf Comput Vis. https:\/\/doi.org\/10.1109\/ICCV.2019.00400","journal-title":"Proc IEEE Int Conf Comput Vis"},{"issue":"12","key":"7102_CR33","doi-asserted-by":"publisher","first-page":"5797","DOI":"10.1109\/TIP.2019.2922108","volume":"28","author":"R Zeng","year":"2019","unstructured":"Zeng R, Gan C, Chen P, Huang W, Wu Q, Tan M (2019) Breaking winner-takes-all: iterative-winners-out networks for weakly supervised temporal action localization. IEEE Trans Image Process 28(12):5797\u20135808. https:\/\/doi.org\/10.1109\/TIP.2019.2922108","journal-title":"IEEE Trans Image Process"},{"key":"7102_CR34","doi-asserted-by":"publisher","unstructured":"Su H, Zhao X, and Lin T (2019) Cascaded pyramid mining network for weakly supervised temporal action localization. Lect Notes Comput Sci (including Subser Lect Notes Artif Intell Lect Notes Bioinformatics), vol. 11362 LNCS, pp. 558\u2013574. https:\/\/doi.org\/10.1007\/978-3-030-20890-5_36","DOI":"10.1007\/978-3-030-20890-5_36"},{"key":"7102_CR35","doi-asserted-by":"publisher","unstructured":"Su H, Zhao X, Lin T, and Fei H (2018) Weakly supervised temporal action detection with shot-based temporal pooling network. Lect Notes Comput Sci (including Subser Lect Notes Artif Intell Lect Notes Bioinformatics), 11304 LNCS, pp. 426\u2013436. https:\/\/doi.org\/10.1007\/978-3-030-04212-7_37","DOI":"10.1007\/978-3-030-04212-7_37"},{"issue":"3","key":"7102_CR36","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O et al (2015) ImageNet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int J Comput Vis"},{"key":"7102_CR37","unstructured":"Kay W et al (2017) The kinetics human action video dataset, [Online]. Available: http:\/\/arxiv.org\/abs\/1705.06950"},{"key":"7102_CR38","doi-asserted-by":"publisher","unstructured":"Zach C, Pock T, and Bischof H (2007) A duality based approach for realtime TV-L1 optical flow. Lect Notes Comput Sci (including Subser Lect Notes Artif Intell Lect Notes Bioinformatics), vol. 4713 LNCS, pp. 214\u2013223. https:\/\/doi.org\/10.1007\/978-3-540-74936-3_22","DOI":"10.1007\/978-3-540-74936-3_22"},{"key":"7102_CR39","unstructured":"Soomro K, Zamir AR, and Shah M (2012) UCF101: a dataset of 101 human actions classes from videos in the wild, [Online]. Available: http:\/\/arxiv.org\/abs\/1212.0402"},{"key":"7102_CR40","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543","author":"H Kuehne","year":"2011","unstructured":"Kuehne H, Jhuang H, Garrote E, Poggio T, Serre T (2011) HMDB: a large video database for human motion recognition. Proc IEEE Int Conf Comput Vis. https:\/\/doi.org\/10.1109\/ICCV.2011.6126543","journal-title":"Proc IEEE Int Conf Comput Vis"},{"issue":"January","key":"7102_CR41","first-page":"568","volume":"1","author":"K Simonyan","year":"2014","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. Adv Neural Inf Process Syst 1(January):568\u2013576","journal-title":"Adv Neural Inf Process Syst"},{"key":"7102_CR42","doi-asserted-by":"publisher","unstructured":"Wang L et al (2016) Temporal segment networks: Towards good practices for deep action recognition. Lect Notes Comput Sci (including Subser. Lect Notes Artif Intell Lect Notes Bioinformatics), vol. 9912 LNCS, pp. 20\u201336. https:\/\/doi.org\/10.1007\/978-3-319-46484-8_2","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"7102_CR43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223","author":"A Karpathy","year":"2014","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Li FF (2014) Large-scale video classification with convolutional neural networks. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit. https:\/\/doi.org\/10.1109\/CVPR.2014.223","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"key":"7102_CR44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.213","author":"C Feichtenhofer","year":"2016","unstructured":"Feichtenhofer C, Pinz A, Zisserman A (2016) Convolutional two-stream network fusion for video action recognition. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit. https:\/\/doi.org\/10.1109\/CVPR.2016.213","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"key":"7102_CR45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.610","author":"X Dai","year":"2017","unstructured":"Dai X, Singh B, Zhang G, Davis LS, Chen YQ (2017) Temporal context network for activity localization in videos. Proc IEEE Int Conf Comput Vis. https:\/\/doi.org\/10.1109\/ICCV.2017.610","journal-title":"Proc IEEE Int Conf Comput Vis"},{"key":"7102_CR46","doi-asserted-by":"publisher","unstructured":"Zhong JX, Li N, Kong W, Zhang T, Li TH, and Li G (2018) Step-by-step erasion, one-by-one collection: a weakly supervised temporal action detector. In: MM 2018 - Proceedings of the 2018 ACM multimedia conference, no. 2014, pp. 35\u201344. https:\/\/doi.org\/10.1145\/3240508.3240511","DOI":"10.1145\/3240508.3240511"},{"key":"7102_CR47","doi-asserted-by":"crossref","unstructured":"Huang L, Huang Y, Ouyang W, and Wang L (2020) Relational prototypical network for weakly supervised temporal action localization. Aaai","DOI":"10.1109\/ICCV48922.2021.00790"},{"key":"7102_CR48","doi-asserted-by":"publisher","unstructured":"Carreira J and Zisserman A (2017) Quo Vadis, action recognition? A new model and the kinetics dataset. In: Proc. - 30th IEEE Conf Comput Vis Pattern Recognition, CVPR 2017, 2017: 4724\u20134733. https:\/\/doi.org\/10.1109\/CVPR.2017.502","DOI":"10.1109\/CVPR.2017.502"},{"key":"7102_CR49","unstructured":"Ioffe S and Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: 32nd Int Conf Mach Learn. ICML 2015, 1:448\u2013456"},{"key":"7102_CR50","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00560","author":"P Nguyen","year":"2019","unstructured":"Nguyen P, Ramanan D, Fowlkes C (2019) Weakly-supervised action localization with background modeling. Proc IEEE Int Conf Comput Vis. https:\/\/doi.org\/10.1109\/ICCV.2019.00560","journal-title":"Proc IEEE Int Conf Comput Vis"},{"key":"7102_CR51","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-19823-7_15","author":"Z Kang","year":"2019","unstructured":"Kang Z, Wang L, Liu Z, Zhang Q, Zheng N (2019) Extracting action sensitive features to facilitate weakly-supervised action localization. IFIP Adv Inform Commun Technol. https:\/\/doi.org\/10.1007\/978-3-030-19823-7_15","journal-title":"IFIP Adv Inform Commun Technol"},{"key":"7102_CR52","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8803447","author":"Y Zhai","year":"2019","unstructured":"Zhai Y, Wang L, Liu Z, Zhang Q, Hua G, Zheng N (2019) Action coherence network for weakly supervised temporal action localization. Proc - Int Conf Image Process. https:\/\/doi.org\/10.1109\/ICIP.2019.8803447","journal-title":"Proc - Int Conf Image Process"},{"key":"7102_CR53","doi-asserted-by":"publisher","unstructured":"Zhang C et al (2019) Adversarial seeded sequence growing for weakly-supervised temporal action localization. In: MM 2019 - Proc 27th ACM Int Conf Multimed, pp. 738\u2013746. https:\/\/doi.org\/10.1145\/3343031.3351044","DOI":"10.1145\/3343031.3351044"},{"key":"7102_CR54","unstructured":"Yuan Y, Lyu Y, Shen X, Tsang IW, and Yeung DY (2019) Marginalized average attentional network for weakly-supervised learning. In: 7th Int Conf Learn. Represent. ICLR 2019, pp. 1\u201319"},{"key":"7102_CR55","doi-asserted-by":"crossref","unstructured":"Min K and Corso JJ (2020) Adversarial background-aware loss for weakly-supervised temporal activity localization. ECCV 2020, [Online]. Available: http:\/\/arxiv.org\/abs\/2007.06643","DOI":"10.1007\/978-3-030-58568-6_17"},{"key":"7102_CR56","unstructured":"Nair V and Hinton GE (2010) Rectified linear units improve restricted Boltzmann machines. In: ICML 2010 - Proceedings, 27th Int Conf Mach Learn, pp. 807\u2013814"},{"key":"7102_CR57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319","author":"B Zhou","year":"2016","unstructured":"Zhou B, Khosla A, Lapedriza A, Oliva A, Torralba A (2016) Learning deep features for discriminative localization. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit. https:\/\/doi.org\/10.1109\/CVPR.2016.319","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"key":"7102_CR58","doi-asserted-by":"crossref","unstructured":"Narayan S, Cholakkal H, Hayat M, Khan FS, Yang MH, and Shao L (2020) D2-Net: weakly-supervised action localization via discriminative embeddings and denoised activations. arXiv, no. December","DOI":"10.1109\/ICCV48922.2021.01335"},{"key":"7102_CR59","doi-asserted-by":"publisher","unstructured":"Islam A and Radke RJ (2020) Weakly supervised temporal action localization using deep metric learning. In: Proceedings - 2020 IEEE winter conference on applications of computer vision, WACV 2020, pp. 536\u2013545. https:\/\/doi.org\/10.1109\/WACV45572.2020.9093620","DOI":"10.1109\/WACV45572.2020.9093620"},{"key":"7102_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cviu.2016.10.018","volume":"155","author":"H Idrees","year":"2017","unstructured":"Idrees H et al (2017) The THUMOS challenge on action recognition for videos \u2018in the wild.\u2019 Comput Vis Image Underst 155:1\u201323. https:\/\/doi.org\/10.1016\/j.cviu.2016.10.018","journal-title":"Comput Vis Image Underst"},{"key":"7102_CR61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698","author":"FC Heilbron","year":"2015","unstructured":"Heilbron FC, Escorcia V, Ghanem B, Niebles JC (2015) ActivityNet: a large-scale video benchmark for human activity understanding. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit. https:\/\/doi.org\/10.1109\/CVPR.2015.7298698","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"},{"key":"7102_CR62","doi-asserted-by":"publisher","unstructured":"Sigurdsson GA, Varol G, Wang X, Farhadi A, Laptev I, and Gupta A (2016) Hollywood in homes: crowdsourcing data collection for activity understanding. Lect Notes Comput Sci (including Subser. Lect Notes Artif Intell Lect Notes Bioinformatics), vol. 9905 LNCS, pp. 510\u2013526. https:\/\/doi.org\/10.1007\/978-3-319-46448-0_31","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"7102_CR63","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00876","author":"H Zhao","year":"2019","unstructured":"Zhao H, Torralba A, Torresani L, Yan Z (2019) HACS: Human action clips and segments dataset for recognition and temporal localization. Proc IEEE Int Conf Comput Vis. https:\/\/doi.org\/10.1109\/ICCV.2019.00876","journal-title":"Proc IEEE Int Conf Comput Vis"},{"key":"7102_CR64","doi-asserted-by":"publisher","unstructured":"Huang Z, Wang X, Wang JJ, Liu W, and Wang JJ (2018) Weakly-supervised semantic segmentation network with deep seeded region growing. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition, pp. 7014\u20137023. https:\/\/doi.org\/10.1109\/CVPR.2018.00733","DOI":"10.1109\/CVPR.2018.00733"},{"key":"7102_CR65","doi-asserted-by":"crossref","unstructured":"Islam A, Long C, and Radke RJ (2021) A hybrid attention mechanism for weakly-supervised temporal action localization, no. Mil, [Online]. Available: http:\/\/arxiv.org\/abs\/2101.00545","DOI":"10.1109\/WACV45572.2020.9093620"},{"key":"7102_CR66","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107686","volume":"110","author":"Y Ge","year":"2021","unstructured":"Ge Y, Qin X, Yang D, Jagersand M (2021) Deep snippet selective network for weakly supervised temporal action localization. Pattern Recognit 110:107686. https:\/\/doi.org\/10.1016\/j.patcog.2020.107686","journal-title":"Pattern Recognit"},{"key":"7102_CR67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00562","author":"T Yu","year":"2019","unstructured":"Yu T, Ren Z, Li Y, Yan E, Xu N, Yuan J (2019) Temporal structure mining for weakly supervised action detection. Proc IEEE Int Conf Comput Vis. https:\/\/doi.org\/10.1109\/ICCV.2019.00562","journal-title":"Proc IEEE Int Conf Comput Vis"},{"key":"7102_CR68","unstructured":"Hendrycks D and Gimpel K (2016) A baseline for detecting misclassified and out-of-distribution examples in neural networks. 5th Int Conf Learn Represent ICLR 2017 \u2013 Conf Track Proc, pp. 1\u201312. [Online]. Available: http:\/\/arxiv.org\/abs\/1610.02136"},{"key":"7102_CR69","doi-asserted-by":"publisher","unstructured":"Hou R, Sukthankar R, and Shah M (2017) Real-time temporal action localization in untrimmed videos by sub-action discovery. Br Mach Vis Conf, BMVC . https:\/\/doi.org\/10.5244\/c.31.91","DOI":"10.5244\/c.31.91"},{"key":"7102_CR70","doi-asserted-by":"publisher","unstructured":"Heidarivincheh F, Mirmehdi M, and Damen D (2019) Weakly-supervised completion moment detection using temporal attention. Proc. - 2019 Int Conf Comput Vis Work. ICCVW 2019, pp. 1188\u20131196. https:\/\/doi.org\/10.1109\/ICCVW.2019.00150","DOI":"10.1109\/ICCVW.2019.00150"},{"key":"7102_CR71","doi-asserted-by":"publisher","unstructured":"Luo Z et al (2020) Weakly-supervised action localization with expectation-maximization multi-instance learning. Lect. Notes Comput Sci (including Subser Lect Notes Artif Intell Lect Notes Bioinformatics), 12374 LNCS, no. Mil, pp. 729\u2013745. https:\/\/doi.org\/10.1007\/978-3-030-58526-6_43","DOI":"10.1007\/978-3-030-58526-6_43"},{"key":"7102_CR72","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2019.2962815","author":"XY Zhang","year":"2020","unstructured":"Zhang XY, Li C, Shi H, Zhu X, Li P, Dong J (2020) AdapNet: adaptability decomposing encoder-decoder network for weakly supervised action recognition and localization. IEEE Trans Neural Netw Learn Syst. https:\/\/doi.org\/10.1109\/TNNLS.2019.2962815","journal-title":"IEEE Trans Neural Netw Learn Syst"},{"key":"7102_CR73","first-page":"4078","volume":"2017","author":"J Snell","year":"2017","unstructured":"Snell J, Swersky K, Zemel R (2017) Prototypical networks for few-shot learning. Adv Neural Inform Process Syst 2017:4078\u20134088","journal-title":"Adv Neural Inform Process Syst"},{"key":"7102_CR74","unstructured":"Kingma DP and J. L. Ba (2015) Adam: a method for stochastic optimization, 3rd Int Conf Learn Represent. ICLR 2015 - Conf Track Proc, pp. 1\u201315"},{"issue":"1","key":"7102_CR75","doi-asserted-by":"publisher","first-page":"74","DOI":"10.1007\/s11263-019-01211-2","volume":"128","author":"Y Zhao","year":"2020","unstructured":"Zhao Y, Xiong Y, Wang L, Wu Z, Tang X, Lin D (2020) Temporal action detection with structured segment networks. Int J Comput Vis 128(1):74\u201395. https:\/\/doi.org\/10.1007\/s11263-019-01211-2","journal-title":"Int J Comput Vis"},{"key":"7102_CR76","unstructured":"Defferrard M, Bresson X, and Vandergheynst P (2016) Convolutional neural networks on graphs with fast localized spectral filtering. Adv Neural Inform Process Syst, no. Nips, pp. 3844\u20133852"},{"issue":"4","key":"7102_CR77","doi-asserted-by":"publisher","first-page":"1770","DOI":"10.1109\/TIP.2017.2651400","volume":"26","author":"J Pang","year":"2017","unstructured":"Pang J, Cheung G (2017) Graph laplacian regularization for image denoising: analysis in the continuous domain. IEEE Trans Image Process 26(4):1770\u20131785. https:\/\/doi.org\/10.1109\/TIP.2017.2651400","journal-title":"IEEE Trans Image Process"},{"key":"7102_CR78","doi-asserted-by":"crossref","unstructured":"Zhai Y, Wang L, Tang W, Zhang Q, and Yuan J (2020) Two-stream consensus network for weakly-supervised temporal action localization. In: Proc Eur. Conf Comput Vis, no. Mil, pp. 1\u201317","DOI":"10.1007\/978-3-030-58539-6_3"},{"key":"7102_CR79","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00984","author":"G Gong","year":"2020","unstructured":"Gong G, Wang X, Mu Y, Tian Q (2020) Learning temporal co-attention models for unsupervised video action localization. Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00984","journal-title":"Proc IEEE Comput Soc Conf Comput Vis Pattern Recognit"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07102-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-022-07102-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-022-07102-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,14]],"date-time":"2022-05-14T05:49:09Z","timestamp":1652507349000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-022-07102-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3,7]]},"references-count":79,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["7102"],"URL":"https:\/\/doi.org\/10.1007\/s00521-022-07102-x","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"type":"print","value":"0941-0643"},{"type":"electronic","value":"1433-3058"}],"subject":[],"published":{"date-parts":[[2022,3,7]]},"assertion":[{"value":"26 March 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 March 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}