{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,6]],"date-time":"2025-10-06T18:11:48Z","timestamp":1759774308817,"version":"build-2065373602"},"reference-count":78,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62377029","92370127","22033002","62407021"],"award-info":[{"award-number":["62377029","92370127","22033002","62407021"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province, China,","doi-asserted-by":"publisher","award":["BK20240583"],"award-info":[{"award-number":["BK20240583"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Frontier Technologies Research and Development Program of Jiangsu","award":["BF2024076"],"award-info":[{"award-number":["BF2024076"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tcsvt.2025.3559991","type":"journal-article","created":{"date-parts":[[2025,4,11]],"date-time":"2025-04-11T14:06:38Z","timestamp":1744380398000},"page":"10382-10395","source":"Crossref","is-referenced-by-count":0,"title":["Throughout Procedural Transformer for Online Action Detection and Anticipation"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3810-7877","authenticated-orcid":false,"given":"Haomiao","family":"Yuan","sequence":"first","affiliation":[{"name":"School of Computer and Electronic Information and the School of Artificial Intelligence, Nanjing Normal University, Nanjing, Jiangsu, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8762-4523","authenticated-orcid":false,"given":"Yi","family":"Chen","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information and the School of Artificial Intelligence, Nanjing Normal University, Nanjing, Jiangsu, China"}]},{"given":"Zheyan","family":"Ji","sequence":"additional","affiliation":[{"name":"School of Economics and Management, Nanjing University of Aeronautics and Astronautics, Nanjing, Jiangsu, China"}]},{"given":"Zhichao","family":"Zheng","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information and the School of Artificial Intelligence, Nanjing Normal University, Nanjing, Jiangsu, China"}]},{"given":"Yanhui","family":"Gu","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information and the School of Artificial Intelligence, Nanjing Normal University, Nanjing, Jiangsu, China"}]},{"given":"Junsheng","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Computer and Electronic Information and the School of Artificial Intelligence, Nanjing Normal University, Nanjing, Jiangsu, China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_17"},{"key":"ref2","first-page":"836","volume-title":"Proc. 12th Eur. Conf. Comput. Vis.","volume":"7572","author":"Fitzgibbon"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.01084"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58610-2_30"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01219"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01438"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299088"},{"key":"ref8","article-title":"Empirical evaluation of gated recurrent neural networks on sequence modeling","author":"Chung","year":"2014","journal-title":"arXiv:1412.3555"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2017.2682196"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2830102"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00563"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00747"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01325"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref17","first-page":"1086","article-title":"Long short-term transformer for online action detection","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Xu"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01930"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01404"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01271"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00674"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1038\/445027a"},{"volume-title":"Proc. THUMOS Workshop","year":"2014","author":"Jiang","key":"ref23"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2022.3217368"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00876"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-021-01531-2"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00195"},{"key":"ref28","article-title":"LAP-Net: Adaptive features sampling via learning action progression for online action detection","author":"Qu","year":"2020","journal-title":"arXiv:2011.07915"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3100842"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.5244\/C.31.92"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00316"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19830-4_28"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2020.2975065"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3156058"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01322"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01240"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00635"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00287"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00306"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2019.2923712"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.119"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.317"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.286"},{"key":"ref44","article-title":"An image is worth 16\u00d716 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020","journal-title":"arXiv:2010.11929"},{"key":"ref45","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Touvron"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413535"},{"key":"ref47","article-title":"Perceiver IO: A general architecture for structured inputs & outputs","author":"Jaegle","year":"2021","journal-title":"arXiv:2107.14795"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33019159"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01817"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01424"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01033"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00949"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3077512"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3082763"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00355"},{"key":"ref58","article-title":"Activity graph transformer for temporal action localization","author":"Nawhal","year":"2021","journal-title":"arXiv:2101.08540"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.05095"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"volume-title":"OpenMMLab\u2019s Next Generation Video Understanding Toolbox and Benchmark","year":"2020","key":"ref63"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"ref65","article-title":"The kinetics human action video dataset","author":"Kay","year":"2017","journal-title":"arXiv:1705.06950"},{"key":"ref66","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"27","author":"Simonyan"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.2992889"},{"key":"ref69","article-title":"Adam: A method for stochastic optimization","author":"Kingma","year":"2014","journal-title":"arXiv:1412.6980"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.155"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00089"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3204808"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2022.108741"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN60899.2024.10650511"},{"key":"ref75","first-page":"47908","article-title":"Does video-text pretraining help open-vocabulary online action detection?","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"37","author":"Zhao"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.01.087"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58517-4_10"},{"key":"ref78","first-page":"10078","article-title":"VideoMAE: Masked autoencoders are data-efficient learners for self-supervised video pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhan"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/76\/11192799\/10963729.pdf?arnumber=10963729","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,6]],"date-time":"2025-10-06T17:40:00Z","timestamp":1759772400000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10963729\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":78,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2025.3559991","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"type":"print","value":"1051-8215"},{"type":"electronic","value":"1558-2205"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}