{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T05:10:01Z","timestamp":1749532201230,"version":"3.41.0"},"reference-count":68,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62222207","62072245","62276134","62002160","62072238"],"award-info":[{"award-number":["62222207","62072245","62276134","62002160","62072238"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004608","name":"Natural Science Foundation of Jiangsu Province","doi-asserted-by":"publisher","award":["BK20211520"],"award-info":[{"award-number":["BK20211520"]}],"id":[{"id":"10.13039\/501100004608","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Open Foundation of the Key Laboratory (Center) of Anhui Jianzhu University Anhui Province Key Laboratory of Intelligent Building and Building Energy Saving","award":["IBES2024KF02"],"award-info":[{"award-number":["IBES2024KF02"]}]},{"name":"Nanjing University Integrated Research Platform of the Ministry of Education-Top Talents Program","award":["14380125"],"award-info":[{"award-number":["14380125"]}]},{"name":"Postdoctoral Fellowship Program of China Postdoctoral Science Foundation","award":["GZB20230302"],"award-info":[{"award-number":["GZB20230302"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Circuits Syst. Video Technol."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tcsvt.2024.3521454","type":"journal-article","created":{"date-parts":[[2024,12,23]],"date-time":"2024-12-23T19:38:25Z","timestamp":1734982705000},"page":"5407-5418","source":"Crossref","is-referenced-by-count":0,"title":["STPM: Spatial-Temporal Token Pruning and Merging for Complex Activity Recognition"],"prefix":"10.1109","volume":"35","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-3433-9771","authenticated-orcid":false,"given":"Yumeng","family":"Su","sequence":"first","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3124-9461","authenticated-orcid":false,"given":"Jiachao","family":"Zhang","sequence":"additional","affiliation":[{"name":"Artificial Intelligence Industrial Technology Research Institute, Nanjing Institute of Technology, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0694-9458","authenticated-orcid":false,"given":"Rui","family":"Yan","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Nanjing University, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7563-601X","authenticated-orcid":false,"given":"Pengpeng","family":"Li","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5487-9845","authenticated-orcid":false,"given":"Guo-Sen","family":"Xie","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4902-4663","authenticated-orcid":false,"given":"Xiangbo","family":"Shu","sequence":"additional","affiliation":[{"name":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01204"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2021.3095381"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3164083"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3282631"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3206108"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3325001"},{"key":"ref11","first-page":"12077","article-title":"SegFormer: Simple and efficient design for semantic segmentation with transformers","volume-title":"Proc. Adv. Neural Inf. Process. Sys. (NIPS)","volume":"34","author":"Xie"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00717"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3120873"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2024.3362475"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3285091"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref17","article-title":"Deformable DETR: Deformable transformers for end-to-end object detection","author":"Zhu","year":"2020","journal-title":"arXiv:2010.04159"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00042"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3261282"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3288547"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01112"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01159"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3188716"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3298645"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3201045"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3285153"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2020.2965491"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3163847"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2022.3141267"},{"key":"ref30","article-title":"Longformer: The long-document transformer","author":"Beltagy","year":"2020","journal-title":"arXiv:2004.05150"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_5"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01554"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19833-5_10"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i2.16235"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00041"},{"key":"ref37","article-title":"DeepViT: Towards deeper vision transformer","author":"Zhou","year":"2021","journal-title":"arXiv:2103.11886"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3211006"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3200245"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3206148"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3207910"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.05095"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3226227"},{"key":"ref46","first-page":"13937","article-title":"DynamicViT: Efficient vision transformers with dynamic token sparsification","volume-title":"Proc. Conf. Neural Inf. Process. Syst.","volume":"34","author":"Rao"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/WACV57701.2024.00016"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01521"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3287201"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3197395"},{"key":"ref51","article-title":"Token merging: Your ViT but faster","author":"Bolya","year":"2022","journal-title":"arXiv:2210.09461"},{"key":"ref52","article-title":"Accelerating transformers with spectrum-preserving token merging","author":"Tran","year":"2024","journal-title":"arXiv:2405.16148"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01490"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01082"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW63382.2024.00369"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2021.3057469"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00427"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_15"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1212.0402"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00099"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00718"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1145\/3474085.3475272"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01332"},{"key":"ref68","first-page":"10078","article-title":"VideoMAE: Masked autoencoders are data-efficient learners for self-supervised video pre-training","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","author":"Zhan"}],"container-title":["IEEE Transactions on Circuits and Systems for Video Technology"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/76\/11027896\/10812838.pdf?arnumber=10812838","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,10]],"date-time":"2025-06-10T04:55:57Z","timestamp":1749531357000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10812838\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":68,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tcsvt.2024.3521454","relation":{},"ISSN":["1051-8215","1558-2205"],"issn-type":[{"type":"print","value":"1051-8215"},{"type":"electronic","value":"1558-2205"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}