{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,11]],"date-time":"2025-11-11T13:50:27Z","timestamp":1762869027733,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"19","license":[{"start":{"date-parts":[[2023,3,15]],"date-time":"2023-03-15T00:00:00Z","timestamp":1678838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,3,15]],"date-time":"2023-03-15T00:00:00Z","timestamp":1678838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61672268"],"award-info":[{"award-number":["61672268"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s11042-023-14670-0","type":"journal-article","created":{"date-parts":[[2023,3,15]],"date-time":"2023-03-15T10:03:22Z","timestamp":1678874602000},"page":"29769-29787","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Deep cascaded action attention network for weakly-supervised temporal action localization"],"prefix":"10.1007","volume":"82","author":[{"given":"Hui-fen","family":"Xia","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7475-2895","authenticated-orcid":false,"given":"Yong-zhao","family":"Zhan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2023,3,15]]},"reference":[{"key":"14670_CR1","doi-asserted-by":"crossref","unstructured":"Caba Heilbron F, Escorcia V, Ghanem B et al (2015) ActivityNet: A large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 961\u2013970","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"14670_CR2","doi-asserted-by":"crossref","unstructured":"Carreira J, Zisserman A, Quo vadis (2017) Action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE conference on computer vision and pattern recognition, pp 6299\u20136308","DOI":"10.1109\/CVPR.2017.502"},{"key":"14670_CR3","doi-asserted-by":"crossref","unstructured":"Chao Y W, Vijayanarasimhan S, Seybold B et al (2018) Rethinking the faster R-CNN architecture for temporal action localization. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1130\u20131139","DOI":"10.1109\/CVPR.2018.00124"},{"key":"14670_CR4","doi-asserted-by":"publisher","first-page":"107686","DOI":"10.1016\/j.patcog.2020.107686","volume":"110","author":"Y Ge","year":"2021","unstructured":"Ge Y, Qin X, Yang D et al (2021) Deep snippet selective network for weakly supervised temporal action localization. Pattern Recogn 110:107686","journal-title":"Pattern Recogn"},{"issue":"07","key":"14670_CR5","first-page":"11053","volume":"34","author":"L Huang","year":"2020","unstructured":"Huang L, Huang Y, Ouyang W et al (2020) Relational prototypical network for weakly supervised temporal action localization. Proc AAAI Conf Artif Intell 34(07):11053\u201311060","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"14670_CR6","unstructured":"Jiang Y G, Liu J, Roshan Zamir A et al (2014) THUMOS challenge: Action recognition with a large number of classes. Sept 3 online. Available: http:\/\/crcv.ucf.edu\/THUMOS14"},{"key":"14670_CR7","unstructured":"Kay W, Carreira J, Simonyan K et al (2017) The kinetics human action video dataset. arXiv:1705.06950"},{"key":"14670_CR8","unstructured":"Kingma DP, Ba J (2014) Adam: A method for stochastic optimization. arXiv:1412.6980"},{"issue":"07","key":"14670_CR9","first-page":"11320","volume":"34","author":"P Lee","year":"2020","unstructured":"Lee P, Uh Y, Byun H (2020) Background suppression network for weakly-supervised temporal action localization. Proc AAAI Conf Artif Intell 34(07):11320\u201311327","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"14670_CR10","doi-asserted-by":"crossref","unstructured":"Lin T, Zhao X, Shou Z (2017) Single shot temporal action detection. In: Proceedings of the 25th ACM international conference on multimedia, pp 988\u2013996","DOI":"10.1145\/3123266.3123343"},{"key":"14670_CR11","doi-asserted-by":"crossref","unstructured":"Liu D, Jiang T, Wang Y (2019) Completeness modeling and context separation for weakly supervised temporal action localization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 1298\u20131307","DOI":"10.1109\/CVPR.2019.00139"},{"key":"14670_CR12","doi-asserted-by":"publisher","unstructured":"Liu Z, Wang L, Zhang Q et al (2021) Weakly supervised temporal action localization through contrast based evaluation networks. IEEE Trans Pattern Anal Intell (Early Access). https:\/\/doi.org\/10.1109\/TPAMI:3078798","DOI":"10.1109\/TPAMI:3078798"},{"key":"14670_CR13","doi-asserted-by":"crossref","unstructured":"Long F, Yao T, Qiu Z et al (2019) Gaussian temporal awareness networks for action localization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 344\u2013353","DOI":"10.1109\/CVPR.2019.00043"},{"key":"14670_CR14","doi-asserted-by":"crossref","unstructured":"Narayan S, Cholakkal H, Khan FS et al (2019) 3C-Net: Category count and center loss for weakly-supervised action localization. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 8679\u20138687","DOI":"10.1109\/ICCV.2019.00877"},{"key":"14670_CR15","unstructured":"Nguyen P, Liu T, Prasad G, et al (2008) Weakly supervised action localization by sparse temporal pooling network. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6752\u20136761"},{"key":"14670_CR16","doi-asserted-by":"crossref","unstructured":"Nguyen P X, Ramanan D, Fowlkes CC (2019) Weakly-supervised action localization with background modeling. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 5502\u20135511","DOI":"10.1109\/ICCV.2019.00560"},{"key":"14670_CR17","doi-asserted-by":"crossref","unstructured":"Paul S, Roy S, Roy-Chowdhury A K (2018) W-TALC: Weakly-supervised temporal activity localization and classification. In: Proceedings of the European conference on computer vision (ECCV), pp 563\u2013579","DOI":"10.1007\/978-3-030-01225-0_35"},{"key":"14670_CR18","doi-asserted-by":"publisher","first-page":"1520","DOI":"10.1109\/LSP.2020.3018914","volume":"27","author":"X Qin","year":"2020","unstructured":"Qin X, Ge Y, Yu H et al (2020) Spatial enhancement and temporal constraint for weakly supervised action localization. IEEE Sig Process Lett 27:1520\u20131524","journal-title":"IEEE Sig Process Lett"},{"issue":"4","key":"14670_CR19","doi-asserted-by":"publisher","first-page":"485","DOI":"10.1007\/s10462-016-9473-y","volume":"46","author":"M Ramezani","year":"2016","unstructured":"Ramezani M, Yaghmaee F (2016) A review on human action analysis in videos for retrieval applications. Artif Intell Rev 46(4):485\u2013514","journal-title":"Artif Intell Rev"},{"key":"14670_CR20","doi-asserted-by":"crossref","unstructured":"Rashid M, Kjellstrom H, Lee YJ (2020) Action graphs: Weakly-supervised action localization with graph convolution networks. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp 615\u2013624","DOI":"10.1109\/WACV45572.2020.9093404"},{"key":"14670_CR21","doi-asserted-by":"crossref","unstructured":"Schindler K, Van Gool L (2008) Action snippets: How many frames does human action recognition require?. In: 2008 IEEE conference on computer vision and pattern recognition. IEEE, pp 1\u20138","DOI":"10.1109\/CVPR.2008.4587730"},{"key":"14670_CR22","doi-asserted-by":"crossref","unstructured":"Shou Z, Gao H, Zhang L, et al (2018) Autoloc: Weakly-supervised temporal action localization in untrimmed videos. In: Proceedings of the European conference on computer vision (ECCV), pp 154\u2013171","DOI":"10.1007\/978-3-030-01270-0_10"},{"key":"14670_CR23","doi-asserted-by":"crossref","unstructured":"Shou Z, Wang D, Chang S F (2016) Temporal action localization in untrimmed videos via multi-stage CNNS. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1049\u20131058","DOI":"10.1109\/CVPR.2016.119"},{"key":"14670_CR24","unstructured":"Simonyan K, Zisserman A (2014) Two-stream convolutional networks for action recognition in videos. arXiv:1406.2199"},{"key":"14670_CR25","doi-asserted-by":"crossref","unstructured":"Sultani W, Chen C, Shah M (2018) Real-world anomaly detection in surveillance videos. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 6479\u20136488","DOI":"10.1109\/CVPR.2018.00678"},{"key":"14670_CR26","doi-asserted-by":"crossref","unstructured":"Tran D, Bourdev L, Fergus R et al (2015) Learning spatiotemporal features with 3D convolutional networks. In: Proceedings of the IEEE international conference on computer vision, pp 4489\u20134497","DOI":"10.1109\/ICCV.2015.510"},{"key":"14670_CR27","doi-asserted-by":"crossref","unstructured":"Wang H, Schmid C (2013) Action recognition with improved trajectories. In: Proceedings of the IEEE international conference on computer vision, pp 3551\u20133558","DOI":"10.1109\/ICCV.2013.441"},{"key":"14670_CR28","doi-asserted-by":"crossref","unstructured":"Wang L, Xiong Y, Lin D, et al (2017) Untrimmednets for weakly supervised action recognition and detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 4325\u20134334","DOI":"10.1109\/CVPR.2017.678"},{"key":"14670_CR29","doi-asserted-by":"crossref","unstructured":"Wang L, Xiong Y, Wang Z et al (2016) Temporal segment networks: Towards good practices for deep action recognition. In: European conference on computer vision. Springer, Cham, pp 20\u201336","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"14670_CR30","doi-asserted-by":"crossref","unstructured":"Wedel A, Pock T, Zach C et al (2009) An improved algorithm for TV-L 1 optical flow. In: Statistical and geometrical approaches to visual motion analysis. Springer, Berlin, pp 23\u201345","DOI":"10.1007\/978-3-642-03061-1_2"},{"key":"14670_CR31","doi-asserted-by":"crossref","unstructured":"Xu M, Zhao C, Rojas D S et al (2020) G-TAD: Sub-graph localization for temporal action detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10156\u201310165","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"14670_CR32","doi-asserted-by":"publisher","first-page":"103276","DOI":"10.1016\/j.jvcir.2021.103276","volume":"80","author":"J Yu","year":"2021","unstructured":"Yu J, Ge Y, Qin X et al (2021) Deep feature enhancing and selecting network for weakly supervised temporal action localization. J Vis Commun Image Represent 80:103276","journal-title":"J Vis Commun Image Represent"},{"issue":"12","key":"14670_CR33","doi-asserted-by":"publisher","first-page":"5797","DOI":"10.1109\/TIP.2019.2922108","volume":"28","author":"R Zeng","year":"2019","unstructured":"Zeng R, Gan C, Chen P et al (2019) Breaking winner-takes-all: Iterative-winners-out networks for weakly supervised temporal action localization. IEEE Trans Image Process 28(12):5797\u20135808","journal-title":"IEEE Trans Image Process"},{"key":"14670_CR34","doi-asserted-by":"crossref","unstructured":"Zeng R, Huang W, Tan M, et al (2019) Graph convolutional networks for temporal action localization. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 7094\u20137103","DOI":"10.1109\/ICCV.2019.00719"},{"key":"14670_CR35","doi-asserted-by":"crossref","unstructured":"Zhao P, Xie L, Ju C et al (2020) Bottom-up temporal action localization with mutual regularization. In: European conference on computer vision. Springer, Cham, pp 539\u2013555","DOI":"10.1007\/978-3-030-58598-3_32"},{"key":"14670_CR36","doi-asserted-by":"crossref","unstructured":"Zhao Y, Xiong Y, Wang L et al (2017) Temporal action detection with structured segment networks. In: Proceedings of the IEEE international conference on computer vision, pp 2914\u20132923","DOI":"10.1109\/ICCV.2017.317"},{"key":"14670_CR37","unstructured":"Zhong JX, Li N, Kong W et al (2008) Step-by-step erasion, one-by-one collection: a weakly supervised temporal action detector. In: Proceedings of the 26th ACM international conference on multimedia, pp 35\u201344"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-14670-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-14670-0\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-14670-0.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,22]],"date-time":"2023-07-22T10:36:56Z","timestamp":1690022216000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-14670-0"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,3,15]]},"references-count":37,"journal-issue":{"issue":"19","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["14670"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-14670-0","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"type":"print","value":"1380-7501"},{"type":"electronic","value":"1573-7721"}],"subject":[],"published":{"date-parts":[[2023,3,15]]},"assertion":[{"value":"15 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 June 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"3 February 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 March 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"<!--Emphasis Type='Bold' removed-->Conflict of Interests"}}]}}