{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,8]],"date-time":"2026-01-08T03:33:48Z","timestamp":1767843228777,"version":"3.49.0"},"reference-count":16,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,7,9]],"date-time":"2023-07-09T00:00:00Z","timestamp":1688860800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,7,9]],"date-time":"2023-07-09T00:00:00Z","timestamp":1688860800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,7,9]]},"DOI":"10.1109\/dac56929.2023.10247799","type":"proceedings-article","created":{"date-parts":[[2023,9,15]],"date-time":"2023-09-15T13:31:31Z","timestamp":1694784691000},"page":"1-6","source":"Crossref","is-referenced-by-count":7,"title":["TF-MVP: Novel Sparsity-Aware Transformer Accelerator with Mixed-Length Vector Pruning"],"prefix":"10.1109","author":[{"given":"Eunji","family":"Yoo","sequence":"first","affiliation":[{"name":"Pohang University of Science and Technology,Pohang,Republic of Korea"}]},{"given":"Gunho","family":"Park","sequence":"additional","affiliation":[{"name":"Pohang University of Science and Technology,Pohang,Republic of Korea"}]},{"given":"Jung Gyu","family":"Min","sequence":"additional","affiliation":[{"name":"Pohang University of Science and Technology,Pohang,Republic of Korea"}]},{"given":"Se","family":"Jung Kwon","sequence":"additional","affiliation":[{"name":"NAVER Cloud,Seongnam,Republic of Korea"}]},{"given":"Baeseong","family":"Park","sequence":"additional","affiliation":[{"name":"NAVER Cloud,Seongnam,Republic of Korea"}]},{"given":"Dongsoo","family":"Lee","sequence":"additional","affiliation":[{"name":"NAVER Cloud,Seongnam,Republic of Korea"}]},{"given":"Youngjoo","family":"Lee","sequence":"additional","affiliation":[{"name":"Pohang University of Science and Technology,Pohang,Republic of Korea"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.155"},{"key":"ref12","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding","author":"han","year":"2015"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1580"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358269"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3140659.3080254"},{"key":"ref2","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Proc of NeurIPS"},{"key":"ref1","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"devlin","year":"2019","journal-title":"Proc of NAACL"},{"key":"ref16","article-title":"Are sixteen heads really better than one?","volume":"32","author":"michel","year":"2019","journal-title":"Proc of NeurIPS"},{"key":"ref8","first-page":"20 378","article-title":"Movement pruning: Adaptive sparsity by fine-tuning","author":"sanh","year":"2020","journal-title":"Proc of NeurIPS"},{"key":"ref7","article-title":"A fast post-training pruning framework for transformers","author":"kwon","year":"2022","journal-title":"Proc of NeurIPS"},{"key":"ref9","first-page":"363","article-title":"Optimus: Optimized matrix multiplication structure for transformer neural network accelerator","author":"park","year":"2020","journal-title":"Proc of MLSys"},{"key":"ref4","first-page":"1877","article-title":"Language models are few-shot learners","author":"brown","year":"2020","journal-title":"Proc of NeurIPS"},{"key":"ref3","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"dosovitskiy","year":"2021","journal-title":"Proc of ICLR"},{"key":"ref6","article-title":"nuqmm: Quantized matmul for efficient inference of large-scale generative language models","author":"park","year":"2022"},{"key":"ref5","article-title":"Palm: Scaling language modeling with pathways","author":"chowdhery","year":"2022"}],"event":{"name":"2023 60th ACM\/IEEE Design Automation Conference (DAC)","location":"San Francisco, CA, USA","start":{"date-parts":[[2023,7,9]]},"end":{"date-parts":[[2023,7,13]]}},"container-title":["2023 60th ACM\/IEEE Design Automation Conference (DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10247654\/10247655\/10247799.pdf?arnumber=10247799","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,10,2]],"date-time":"2023-10-02T13:41:06Z","timestamp":1696254066000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10247799\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,9]]},"references-count":16,"URL":"https:\/\/doi.org\/10.1109\/dac56929.2023.10247799","relation":{},"subject":[],"published":{"date-parts":[[2023,7,9]]}}}