{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,17]],"date-time":"2026-06-17T16:32:30Z","timestamp":1781713950204,"version":"3.54.5"},"reference-count":85,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nc-nd\/4.0\/"}],"funder":[{"name":"USyd-Fudan BISA Flagship Research Program"},{"name":"Irene Amerini","award":["PE00000014"],"award-info":[{"award-number":["PE00000014"]}]},{"name":"MUR National Recovery and Resilience Plan"},{"name":"European Union - NextGenerationEU"},{"DOI":"10.13039\/501100004271","name":"Sapienza Universit\u00e0 di Roma","doi-asserted-by":"publisher","award":["003_009_22"],"award-info":[{"award-number":["003_009_22"]}],"id":[{"id":"10.13039\/501100004271","id-type":"DOI","asserted-by":"publisher"}]},{"name":"RobFastMDE"},{"name":"The University of Sydney, Australia"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tpami.2024.3392941","type":"journal-article","created":{"date-parts":[[2024,4,24]],"date-time":"2024-04-24T17:27:32Z","timestamp":1713979652000},"page":"7682-7700","source":"Crossref","is-referenced-by-count":108,"title":["A Survey on Efficient Vision Transformers: Algorithms, Techniques, and Performance Benchmarking"],"prefix":"10.1109","volume":"46","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9393-5248","authenticated-orcid":false,"given":"Lorenzo","family":"Papa","sequence":"first","affiliation":[{"name":"Department of Computer, Control and Management Engineering, Sapienza University of Rome, Rome, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1886-3491","authenticated-orcid":false,"given":"Paolo","family":"Russo","sequence":"additional","affiliation":[{"name":"Department of Computer, Control and Management Engineering, Sapienza University of Rome, Rome, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6461-1391","authenticated-orcid":false,"given":"Irene","family":"Amerini","sequence":"additional","affiliation":[{"name":"Department of Computer, Control and Management Engineering, Sapienza University of Rome, Rome, Italy"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8762-2424","authenticated-orcid":false,"given":"Luping","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Electrical and Information Engineering, Faculty of Engineering, University of Sydney, Sydney, NSW, Australia"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref2","article-title":"RoBERTa: A robustly optimized BERT pretraining approach","author":"Liu","year":"2019"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20053-3_5"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01170"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01179"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref7","article-title":"Monarch mixer: A simple sub-quadratic GEMM-based architecture","author":"Fu","year":"2023"},{"key":"ref8","first-page":"24 261","article-title":"MLP-mixer: An all-MLP architecture for vision","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Tolstikhin"},{"key":"ref9","article-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2014"},{"key":"ref10","article-title":"Vision transformer adapter for dense predictions","author":"Chen","year":"2022"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00389"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2023.3260310"},{"key":"ref13","first-page":"76","article-title":"Stanford neural machine translation systems for spoken language domains","volume-title":"Proc. 12th Int. Workshop Spoken Lang. Transl. Eval. Campaign","author":"Luong"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W16-2301"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.89"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00378"},{"key":"ref18","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01838"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-25063-7_42"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2022.3152247"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3530811"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3505244"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/764"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20053-3_29"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevA.39.6600"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/72.80236"},{"key":"ref28","article-title":"Distilling the knowledge in a neural network","author":"Hinton","year":"2015"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref31","first-page":"21 297","article-title":"SOFT: Softmax-free transformer with linear complexity","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Lu"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01055"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00476"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-022-0274-8"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1163\/2214-8647_bnp_e1113370"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_40"},{"key":"ref38","volume-title":"Network Flows: Theory, Algorithms, and Applications","author":"Waissi","year":"1994"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-25082-8_3"},{"key":"ref40","first-page":"5156","article-title":"Transformers are RNNs: Fast autoregressive transformers with linear attention","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Katharopoulos"},{"key":"ref41","first-page":"14 596","article-title":"Orthogonal transformer: An efficient vision transformer backbone with token orthogonalization","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Huang"},{"key":"ref42","first-page":"10295","article-title":"EcoFormer: Energy-saving attention with linear complexity","volume":"35","author":"Liu","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref43","first-page":"1177","article-title":"Random features for large-scale kernel machines","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rahimi"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01387"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01587"},{"key":"ref46","article-title":"Vision transformer pruning","author":"Zhu","year":"2021"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01185"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.298"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v36i3.20222"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01779"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3263826"},{"key":"ref52","first-page":"13 937","article-title":"DynamicViT: Efficient vision transformers with dynamic token sparsification","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Rao"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01054"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20083-0_37"},{"key":"ref55","article-title":"Token merging: Your ViT but faster","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Bolya"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02333"},{"key":"ref57","first-page":"10 347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Touvron"},{"key":"ref58","first-page":"9164","article-title":"Learning efficient vision transformers via fine-grained manifold distillation","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Hao"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19803-8_5"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1016\/b978-1-55860-872-6.x5016-1"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01174"},{"key":"ref62","article-title":"On the relationship between self-attention and convolutional layers","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Cordonnier"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00874"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01627"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01214"},{"key":"ref66","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01183"},{"key":"ref67","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01882"},{"key":"ref68","first-page":"28 092","article-title":"Post-training quantization for vision transformer","volume-title":"Proc. Int. Conf. Neural Inf. Process. Syst.","author":"Liu"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19775-8_12"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.1145\/3503161.3547826"},{"key":"ref71","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2020\/322"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01946"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.1109\/FPL57034.2022.00027"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00027"},{"key":"ref75","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.02170"},{"key":"ref76","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-015-0816-y"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.544"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1109\/iccv51070.2023.01549"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.2104.10858"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.322"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref83","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00644"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00656"},{"key":"ref85","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01228-1_26"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10746266\/10508091.pdf?arnumber=10508091","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,27]],"date-time":"2024-11-27T00:07:56Z","timestamp":1732666076000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10508091\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":85,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2024.3392941","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}