{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T00:44:57Z","timestamp":1772325897179,"version":"3.50.1"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"5-6","license":[{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T00:00:00Z","timestamp":1733788800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int. J. Mach. Learn. &amp; Cyber."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1007\/s13042-024-02476-x","type":"journal-article","created":{"date-parts":[[2024,12,10]],"date-time":"2024-12-10T21:58:49Z","timestamp":1733867929000},"page":"3711-3728","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["A transformer-based convolutional local attention (ConvLoA) method for temporal action localization"],"prefix":"10.1007","volume":"16","author":[{"given":"Sainithin","family":"Artham","sequence":"first","affiliation":[]},{"given":"Soharab Hossain","family":"Shaikh","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,10]]},"reference":[{"key":"2476_CR1","doi-asserted-by":"crossref","unstructured":"Qing Z, Su H, Gan W, Wang D, Wu W. Wang X, Qiao Y, Yan J, Gao C, Sang N (2021) Temporal context aggregation network for temporal action proposal refinement. ArXiv.\/abs\/2103.13141.","DOI":"10.1109\/CVPR46437.2021.00055"},{"key":"2476_CR2","doi-asserted-by":"crossref","unstructured":"Sridhar D, Quader N, Muralidharan S, Li Y, Dai P, Lu J (2021) Class semantics-based attention for action detection. ArXiv. \/abs\/2109.02613","DOI":"10.1109\/ICCV48922.2021.01348"},{"key":"2476_CR3","doi-asserted-by":"crossref","unstructured":"Xu M, Zhao C, Rojas DS, Thabet A, Ghanem B (2019) G-TAD: sub-graph localization for temporal action detection. ArXiv. \/abs\/1911.11462.","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"2476_CR4","doi-asserted-by":"crossref","unstructured":"Zeng R, Huang W, Tan M, Rong Y, Zhao P, Huang J, Gan C (2019) Graph convolutional networks for temporal action localization. ArXiv. \/abs\/1909.03252.","DOI":"10.1109\/ICCV.2019.00719"},{"key":"2476_CR5","doi-asserted-by":"crossref","unstructured":"Zhu Z, Tang W, Wang L, Zheng N, Hua G (2021) Enriching local and global contexts for temporal action localization. ArXiv. \/abs\/2107.12960","DOI":"10.1109\/ICCV48922.2021.01326"},{"key":"2476_CR6","doi-asserted-by":"crossref","unstructured":"Lin C, Xu C, Luo D, Wang Y, Tai Y, Wang C, Li J, Huang F, Fu Y Learning salient boundary feature for anchor-free temporal action localization. 2021 IEEE\/CVF CVPR (2021): 3319\u20133328.","DOI":"10.1109\/CVPR46437.2021.00333"},{"key":"2476_CR7","doi-asserted-by":"publisher","first-page":"8535","DOI":"10.1109\/TIP.2020.3016486","volume":"29","author":"Le Yang","year":"2020","unstructured":"Yang Le, Peng H, Zhang D, Jianlong Fu, Han J (2020) Revisiting anchor mechanisms for temporal action localization. IEEE Trans Image Process 29:8535\u20138548","journal-title":"IEEE Trans Image Process"},{"key":"2476_CR8","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2023.103692","volume":"232","author":"M Yang","year":"2022","unstructured":"Yang M, Chen G, Zheng Y-D, Tong Lu, Wang L (2022) BasicTAD: an astounding RGB-only baseline for temporal action detection. Comput Vis Image Underst 232:103692","journal-title":"Comput Vis Image Underst"},{"key":"2476_CR9","doi-asserted-by":"crossref","unstructured":"Chen G, Zheng Y-D, Wang L, Lu T(2021) DCAN: Improving Temporal Action Detection via Dual Context Aggregation. AAAI Conference on Artificial Intelligence.","DOI":"10.1609\/aaai.v36i1.19900"},{"key":"2476_CR10","doi-asserted-by":"crossref","unstructured":"Escorcia V, Caba Heilbron F, Carlos Niebles J, Ghanem B (2016) Daps: Deep action proposals for action understanding. In Eur. Conf. Comput. Vis.","DOI":"10.1007\/978-3-319-46487-9_47"},{"key":"2476_CR11","doi-asserted-by":"crossref","unstructured":"Lin C, Li J, Wang Y, Tai Y, Luo D, Cui Z, Wang C, Li J, Huang F, Ji R Fast learning of temporal action pro-posal via dense boundary generator. In: AAAI Conference on Artificial Intelligence, 2020.","DOI":"10.1609\/aaai.v34i07.6815"},{"key":"2476_CR12","doi-asserted-by":"crossref","unstructured":"Lin T, Liu X, Li X, Ding E, Wen S (2019) BMN: boundary-matching network for temporal action proposal generation. ArXiv.\/abs\/1907.09702.","DOI":"10.1109\/ICCV.2019.00399"},{"key":"2476_CR13","doi-asserted-by":"crossref","unstructured":"Lin T, Zhao X, Su H, Wang C, Yang M (2018) BSN: boundary sensitive network for temporal action proposal generation. ArXiv. \/abs\/1806.02964.","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"2476_CR14","doi-asserted-by":"crossref","unstructured":"Lin K, Li L, Lin C, Ahmed F, Gan Z, Liu Z, Lu Y, Wang L (2021) SwinBERT: End-to-End transformers with sparse attention for video captioning. ArXiv. \/abs\/2111.13196.","DOI":"10.1109\/CVPR52688.2022.01742"},{"key":"2476_CR15","doi-asserted-by":"crossref","unstructured":"Liu X, Hu Y, Bai S, Ding F, Bai X, Torr PH (2020) Multi-shot temporal event localization: a benchmark. ArXiv. \/abs\/2012.09434.","DOI":"10.1109\/CVPR46437.2021.01241"},{"key":"2476_CR16","doi-asserted-by":"publisher","unstructured":"Zhang Y, Deng L, Zhu H, Wang W, Ren Z, Zhou Q, Lu S, Sun S, Zhu Z, Manuel Gorriz J, Wang S, Deep learning in food category recognition, Information Fusion, 98, 2023, 101859, ISSN 1566\u20132535, https:\/\/doi.org\/10.1016\/j.inffus.2023.101859.","DOI":"10.1016\/j.inffus.2023.101859"},{"key":"2476_CR17","unstructured":"Yang M, Gao H, Guo P, Wang L (2023) Adapting short-term transformers for action detection in untrimmed videos. ArXiv.\/abs\/2312.01897."},{"key":"2476_CR18","first-page":"6","volume":"2","author":"Le Yang","year":"2020","unstructured":"Yang Le, Peng H, Zhang D, Jianlong Fu, Han J (2020) Revisiting anchor mechanisms for temporal action localization. IEEE Trans Image Process 2:6","journal-title":"IEEE Trans Image Process"},{"key":"2476_CR19","doi-asserted-by":"crossref","unstructured":"Yang M, Chen G, Zheng Y-D, Lu T, Wang L Basictad: an astounding rgb-only baseline for temporal action detection. arXiv preprint arXiv:2205.02717, 2022.","DOI":"10.1016\/j.cviu.2023.103692"},{"key":"2476_CR20","doi-asserted-by":"publisher","first-page":"459","DOI":"10.1109\/OJEMB.2023.3305190","volume":"5","author":"Z Ren","year":"2024","unstructured":"Ren Z, Kong X, Zhang Y, Wang S (2024) UKSSL: underlying knowledge based semi-supervised learning for medical image classification. IEEE Open J Eng Med Biol 5:459\u2013466. https:\/\/doi.org\/10.1109\/OJEMB.2023.3305190","journal-title":"IEEE Open J Eng Med Biol"},{"key":"2476_CR21","doi-asserted-by":"crossref","unstructured":"Liu X, Bai S, Bai X An empirical study of end-to-end temporal ac-tion detection. In Proceedings of the IEEE\/CVF CVPR, pages 20010\u201320019, 2022","DOI":"10.1109\/CVPR52688.2022.01938"},{"key":"2476_CR22","doi-asserted-by":"publisher","first-page":"5427","DOI":"10.1109\/TIP.2022.3195321","volume":"31","author":"X Liu","year":"2022","unstructured":"Liu X et al (2022) End-to-end temporal action detection with transformer. IEEE Trans Image Process 31:5427\u20135441. https:\/\/doi.org\/10.1109\/TIP.2022.3195321","journal-title":"IEEE Trans Image Process"},{"key":"2476_CR23","doi-asserted-by":"crossref","unstructured":"Shi D, Zhong Y, Cao Q, Zhang J, Ma L, Li J, Tao D (2022) ReAct: temporal action detection with relational queries. ArXiv.\/abs\/2207.07097.","DOI":"10.1007\/978-3-031-20080-9_7"},{"key":"2476_CR24","doi-asserted-by":"crossref","unstructured":"Tan J, Tang J, Wang L, Wu G (2021) Relaxed transformer decoders for direct action proposal generation. ArXiv. \/abs\/2102.01894.","DOI":"10.1109\/ICCV48922.2021.01327"},{"key":"2476_CR25","doi-asserted-by":"publisher","unstructured":"Kim H-J, Lee S-W (2024) Ensuring spatial scalability with temporal-wise spatial attentive pooling for temporal action detection, Neural Networks, 176: 106321, ISSN 0893-6080, https:\/\/doi.org\/10.1016\/j.neunet.2024.106321.","DOI":"10.1016\/j.neunet.2024.106321"},{"key":"2476_CR26","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3195321","author":"X Liu","year":"2021","unstructured":"Liu X, Wang Q, Hu Y, Tang X, Zhang S, Bai S, Bai X (2021) End-to-end temporal action detection with transformer. ArXiv. https:\/\/doi.org\/10.1109\/TIP.2022.3195321","journal-title":"ArXiv"},{"key":"2476_CR27","unstructured":"Zhao, C., Thabet, A., & Ghanem, B. (2020). Video Self-Stitching Graph Network for Temporal Action Localization. ArXiv. \/abs\/2011.14598."},{"key":"2476_CR28","doi-asserted-by":"crossref","unstructured":"Liu Q, Wang Z Progressive boundary refinement network for temporal action detection. In Proceedings of the AAAI Conference on Artificial Intelligence, 2020.","DOI":"10.1609\/aaai.v34i07.6829"},{"key":"2476_CR29","doi-asserted-by":"crossref","unstructured":"Zeng Y, Zhong Y, Feng C, Ma L (2024) UniMD: Towards unifying moment retrieval and temporal action detection. ArXiv. \/abs\/2404.04933.","DOI":"10.1007\/978-3-031-72952-2_17"},{"key":"2476_CR30","doi-asserted-by":"crossref","unstructured":"Cheng F, Bertasius G (2022) TALLFormer: temporal action localization with a long-memory transformer. ArXiv. \/abs\/2204.01680.","DOI":"10.1007\/978-3-031-19830-4_29"},{"key":"2476_CR31","doi-asserted-by":"crossref","unstructured":"Zhang C, Wu J, Li Y (2022) ActionFormer: localizing moments of actions with transformers. ArXiv. \/abs\/2202.07925.","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"2476_CR32","doi-asserted-by":"crossref","unstructured":"Xiong Y, Dai B, Lin D (2018) Move forward and tell: a progressive generator of video descriptions. ArXiv, abs\/1807.10018.","DOI":"10.1007\/978-3-030-01252-6_29"},{"key":"2476_CR33","doi-asserted-by":"crossref","unstructured":"Wang T, Zhang R, Lu Z, Zheng F, Cheng R, Luo P (2021) End-to-End Dense Video Captioning with Parallel Decoding. ArXiv. \/abs\/2108.07781.","DOI":"10.1109\/ICCV48922.2021.00677"},{"key":"2476_CR34","doi-asserted-by":"crossref","unstructured":"Shi D, Zhong Y, Cao Q, Ma L, Li J, Tao D (2023) TriDet: Temporal action detection with relative boundary modeling. ArXiv.\/abs\/2303.07347.","DOI":"10.1109\/CVPR52729.2023.01808"},{"key":"2476_CR35","doi-asserted-by":"crossref","unstructured":"Yu-Wei Chao, Sudheendra Vijayanarasimhan, Bryan Seybold, David A Ross, Jia Deng, and Rahul Sukthankar. Rethinking the faster r-cnn architecture for temporal action localization. In: Proceedings of the IEEE CVPR, pages 1130\u20131139, 2018","DOI":"10.1109\/CVPR.2018.00124"},{"key":"2476_CR36","unstructured":"Liu Z, Ning J, Cao Y, Wei Y, Zhang Z, Lin S, Hu H Video swin transformer. arXiv preprint. ArXiv. \/abs\/2106.13230, (2021)."},{"key":"2476_CR37","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. ArXiv.\/abs\/2103.14030.","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"2476_CR38","unstructured":"Zhu W, Pang B, Thapliyal AV, Wang WY, Soricut R (2022) End-to-end dense video captioning as sequence generation. ArXiv.\/abs\/2204.08121."},{"key":"2476_CR39","doi-asserted-by":"crossref","unstructured":"Yang A, Nagrani A, Seo PH, Miech A, Laptev I, Sivic J, Schmid C (2023) Vid2Seq: Large-scale pretraining of a visual language model for dense video captioning. ArXiv.\/abs\/2302.14115.","DOI":"10.1109\/CVPR52729.2023.01032"},{"key":"2476_CR40","doi-asserted-by":"crossref","unstructured":"Zhang K, Chao W, Sha F, Grauman K (2016) Video summarization with long short-term memory. ArXiv.\/abs\/1605.08110.","DOI":"10.1007\/978-3-319-46478-7_47"},{"key":"2476_CR41","doi-asserted-by":"crossref","unstructured":"Shou Z, Wang D, Chang S (2016) Temporal Action localization in untrimmed videos via multi-stage CNNs. ArXiv.\/abs\/1601.02129.","DOI":"10.1109\/CVPR.2016.119"},{"key":"2476_CR42","doi-asserted-by":"publisher","unstructured":"Kwak NJ, Song TS (2013) Human action classification and unusual action recognition algorithm for intelligent surveillance system. In: Kim K, Chung KY (eds) IT Convergence and Security 2012. Lecture Notes in Electrical Engineering, vol 215. Springer, Dordrecht. https:\/\/doi.org\/10.1007\/978-94-007-5860-5_95.","DOI":"10.1007\/978-94-007-5860-5_95"},{"key":"2476_CR43","doi-asserted-by":"publisher","first-page":"2381","DOI":"10.1007\/s10994-022-06141-8","volume":"111","author":"S Chen","year":"2022","unstructured":"Chen S, Ke Xu, Mi Z, Jiang X, Sun T (2022) Dual-domain graph convolutional networks for skeleton-based action recognition. Mach Learn 111:2381\u20132406","journal-title":"Mach Learn"},{"key":"2476_CR44","unstructured":"Keshvarikhojasteh H, Mohammadzade H, Behroozi H (2021) Temporal action localization using gated recurrent units. ArXiv.\/abs\/2108.03375."},{"key":"2476_CR45","doi-asserted-by":"crossref","unstructured":"Yang Z, Yang D, Dyer C, He X, Smola A, Hovy E (2016) June. Hierarchical attention networks for document classification. In: Proceedings of the 2016 conference of the North American chapter of the association for computational linguistics: human language technologies\u00a0(pp. 1480\u20131489).","DOI":"10.18653\/v1\/N16-1174"},{"key":"2476_CR46","first-page":"2579","volume":"9","author":"L Van der Maaten","year":"2008","unstructured":"Van der Maaten L, Hinton G (2008) Visualizing data using t-SNE. J Mach Learn Res 9:2579\u20132605","journal-title":"J Mach Learn Res"},{"key":"2476_CR47","doi-asserted-by":"crossref","unstructured":"Zhou L, Zhou Y, Corso JJ, Socher R, Xiong C, End-to-end dense video captioning with masked transformer. In: Proc. IEEE Conf. Comput. Vis. Pattern Recognit., 2018, pp. 8739\u20138748.","DOI":"10.1109\/CVPR.2018.00911"},{"key":"2476_CR48","doi-asserted-by":"crossref","unstructured":"Krishna R, Hata K, Ren F, Niebles JC (2017) Dense-captioning events in videos. ArXiv.\/abs\/1705.00754.","DOI":"10.1109\/ICCV.2017.83"},{"key":"2476_CR49","doi-asserted-by":"crossref","unstructured":"Zhu Y, Zhang G, Tan J, Wu G, Wang L. (2024) Dual DETRs for multi-label temporal action detection. ArXiv.\/abs\/2404.00653.","DOI":"10.1109\/CVPR52733.2024.01756"},{"key":"2476_CR50","doi-asserted-by":"crossref","unstructured":"Liu S, Zhang C, Zhao C, Ghanem B (2023) End-to-end temporal action detection with 1B parameters across 1000 frames. ArXiv.\/abs\/2311.17241.","DOI":"10.1109\/CVPR52733.2024.01759"},{"key":"2476_CR51","doi-asserted-by":"crossref","unstructured":"Kim, H., Hong, J., Kong, H., & Lee, S. (2024). TE-TAD: Towards Full End-to-End Temporal Action Detection via Time-Aligned Coordinate Expression. ArXiv. \/abs\/2404.02405.","DOI":"10.1109\/CVPR52733.2024.01782"},{"key":"2476_CR52","doi-asserted-by":"crossref","unstructured":"Zhou L, Xu C, Corso JJ (2017) Towards Automatic learning of procedures from web instructional videos. ArXiv. \/abs\/1703.09788.","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"2476_CR53","doi-asserted-by":"crossref","unstructured":"Caba Heilbron F, Escorcia V, Ghanem B, Carlos Niebles J (2015) Activitynet: A large-scale video benchmark for human activity understanding. In CVPR, 961\u2013970.","DOI":"10.1109\/CVPR.2015.7298698"},{"issue":"17","key":"2476_CR54","doi-asserted-by":"publisher","first-page":"7563","DOI":"10.3390\/s23177563","volume":"23","author":"H Zhang","year":"2023","unstructured":"Zhang H, Zhou F, Ma C, Wang D, Zhang W (2023) MCMNET: multi-scale context modeling network for temporal action detection. Sensors (Basel) 23(17):7563. https:\/\/doi.org\/10.3390\/s23177563","journal-title":"Sensors (Basel)"},{"key":"2476_CR55","doi-asserted-by":"crossref","unstructured":"Kim J, Lee M, Heo J (2023) Self-feedback DETR for temporal action detection. ArXiv.\/abs\/2308.10570.","DOI":"10.1109\/ICCV51070.2023.00944"},{"key":"2476_CR56","doi-asserted-by":"crossref","unstructured":"Vahdani E, Tian Y (2023) ADM-Loc: actionness distribution modeling for point-supervised temporal action localization. ArXiv.\/abs\/2311.15916.","DOI":"10.2139\/ssrn.4943145"},{"key":"2476_CR57","doi-asserted-by":"publisher","unstructured":"Zhang H, Gao L, Zeng P, Hanjalic A, Tao Shen H (2023) Depth-aware sparse transformer for video-language learning. In: Proceedings of the 31st ACM international conference on multimedia (MM '23). association for computing machinery, New York, NY, USA, 4778\u20134787. https:\/\/doi.org\/10.1145\/3581783.3611714.","DOI":"10.1145\/3581783.3611714"},{"key":"2476_CR58","doi-asserted-by":"publisher","first-page":"2367","DOI":"10.1109\/TMM.2023.3295098","volume":"26","author":"S Jing","year":"2024","unstructured":"Jing S, Zhang H, Zeng P, Gao L, Song J, Shen HT (2024) Memory-Based augmentation network for video captioning. IEEE Trans Multimedia 26:2367\u20132379. https:\/\/doi.org\/10.1109\/TMM.2023.3295098","journal-title":"IEEE Trans Multimedia"},{"key":"2476_CR59","doi-asserted-by":"crossref","unstructured":"Liu D, Qu X, Dong J, Zhou P, Cheng Y, Wei W, Xu Z, Xie Y (2021) Context-aware Biaffine localizing network for temporal sentence grounding. ArXiv.\/abs\/2103.11555","DOI":"10.1109\/CVPR46437.2021.01108"},{"key":"2476_CR60","doi-asserted-by":"crossref","unstructured":"Fang X, Liu D, Zhou P, Nan G (2023) You can ground earlier than see: an effective and efficient pipeline for temporal sentence grounding in compressed videos. ArXiv.\/abs\/2303.07863.","DOI":"10.1109\/CVPR52729.2023.00242"}],"container-title":["International Journal of Machine Learning and Cybernetics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02476-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13042-024-02476-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13042-024-02476-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T04:31:45Z","timestamp":1749270705000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13042-024-02476-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,10]]},"references-count":60,"journal-issue":{"issue":"5-6","published-print":{"date-parts":[[2025,6]]}},"alternative-id":["2476"],"URL":"https:\/\/doi.org\/10.1007\/s13042-024-02476-x","relation":{},"ISSN":["1868-8071","1868-808X"],"issn-type":[{"value":"1868-8071","type":"print"},{"value":"1868-808X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12,10]]},"assertion":[{"value":"9 January 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 December 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there are no conflicts of interest regarding the publication of this research article. They have no financial or non-financial relationships that could be perceived as potentially influencing the content or findings presented in this work. This research was conducted in an unbiased manner, without any external influence or involvement from organizations, companies, or individuals that could pose a conflict of interest. The authors have adhered to the highest ethical standards throughout the research process to ensure the integrity and objectivity of the study's outcomes.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Youcook2 and ActivityNet are publicly available datasets. There are no restrictions on using it for independent research work.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}}]}}