{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T23:15:18Z","timestamp":1762384518505,"version":"build-2065373602"},"reference-count":43,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,1]],"date-time":"2025-05-01T00:00:00Z","timestamp":1746057600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100010418","name":"MSIT (Ministry of Science and ICT), Korea, under the ITRC (Information Technology Research Center) support program","doi-asserted-by":"publisher","award":["IITP-2020-0-01847"],"award-info":[{"award-number":["IITP-2020-0-01847"]}],"id":[{"id":"10.13039\/501100010418","id-type":"DOI","asserted-by":"publisher"}]},{"name":"IITP"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput."],"published-print":{"date-parts":[[2025,5]]},"DOI":"10.1109\/tc.2025.3540638","type":"journal-article","created":{"date-parts":[[2025,2,11]],"date-time":"2025-02-11T13:23:13Z","timestamp":1739280193000},"page":"1620-1633","source":"Crossref","is-referenced-by-count":1,"title":["AToM: Adaptive Token Merging for Efficient Acceleration of Vision Transformer"],"prefix":"10.1109","volume":"74","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5943-1599","authenticated-orcid":false,"given":"Jaekang","family":"Shin","sequence":"first","affiliation":[{"name":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3557-8526","authenticated-orcid":false,"given":"Myeonggu","family":"Kang","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0432-9324","authenticated-orcid":false,"given":"Yunki","family":"Han","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-7722-8702","authenticated-orcid":false,"given":"Junyoung","family":"Park","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9585-4591","authenticated-orcid":false,"given":"Lee-Sup","family":"Kim","sequence":"additional","affiliation":[{"name":"School of Electrical Engineering, Korea Advanced Institute of Science and Technology (KAIST), Daejeon, South Korea"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"30","author":"Vaswani","year":"2017"},{"article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","year":"2019","author":"Devlin","key":"ref2"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/W18-5446"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/319"},{"key":"ref5","article-title":"Stand-alone self-attention in vision models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Ramachandran","year":"2019"},{"key":"ref6","first-page":"7354","article-title":"Self-attention generative adversarial networks","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Zhang","year":"2019"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2010.11929"},{"key":"ref8","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Touvron","year":"2021"},{"key":"ref9","first-page":"15908","article-title":"Transformer in transformer","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Han","year":"2021"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01204"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"article-title":"Longformer: The long-document transformer","year":"2020","author":"Beltagy","key":"ref14"},{"article-title":"Generating long sequences with sparse transformers","year":"2019","author":"Child","key":"ref15"},{"key":"ref16","first-page":"250","article-title":"MnnFast: A fast and scalable system architecture for memory-augmented neural networks","volume-title":"Proc. 46th Int. Symp. Comput. Archit.","author":"Jang","year":"2019"},{"key":"ref17","first-page":"328","article-title":"A3: Accelerating attention mechanisms in neural networks with approximation","volume-title":"Proc. IEEE Int. Symp. High Perform. Comput. Archit. (HPCA)","author":"Ham","year":"2020"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00018"},{"key":"ref19","first-page":"3690","article-title":"Power-BERT: Accelerating BERT inference via progressive word-vector elimination","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Goyal","year":"2020"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20083-0_37"},{"article-title":"CP-ViT: Cascade vision transformer pruning via progressive sparsity prediction","year":"2022","author":"Song","key":"ref21"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/hpca56546.2023.10071047"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3466752.3480125"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10071027"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1145\/3503222.3507738"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA56546.2023.10070997"},{"article-title":"Token merging: Your ViT but faster","year":"2022","author":"Bolya","key":"ref28"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.195"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.findings-emnlp.372"},{"article-title":"DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter","year":"2019","author":"Sanh","key":"ref32"},{"key":"ref33","article-title":"MobileViT: Light-weight, general-purpose, and mobile-friendly vision transformer","volume-title":"Proc. Int. Conf. Learn. Represent.","author":"Mehta","year":"2021"},{"key":"ref34","first-page":"19974","article-title":"Chasing sparsity in vision transformers: An end-to-end exploration","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Chen","year":"2021"},{"key":"ref35","first-page":"24898","article-title":"IA-RED${}^{2}$\u00a02: Interpretability-aware redundancy reduction for vision transformers","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Pan","year":"2021"},{"key":"ref36","first-page":"13937","article-title":"DynamicViT: Efficient vision transformers with dynamic token sparsification","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"34","author":"Rao","year":"2021"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2023.3293763"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.5555\/3454287.3455008"},{"article-title":"PyTorch image models","year":"2019","author":"Wightman","key":"ref39"},{"article-title":"Huggingface\u2019s transformers: State-of-the-art natural language processing","year":"2019","author":"Wolf","key":"ref40"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/LCA.2020.2973991"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1145\/3085572"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00387"}],"container-title":["IEEE Transactions on Computers"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/12\/10957749\/10880106.pdf?arnumber=10880106","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,5]],"date-time":"2025-11-05T23:10:25Z","timestamp":1762384225000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10880106\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5]]},"references-count":43,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/tc.2025.3540638","relation":{},"ISSN":["0018-9340","1557-9956","2326-3814"],"issn-type":[{"type":"print","value":"0018-9340"},{"type":"electronic","value":"1557-9956"},{"type":"electronic","value":"2326-3814"}],"subject":[],"published":{"date-parts":[[2025,5]]}}}