{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:35:40Z","timestamp":1773246940400,"version":"3.50.1"},"reference-count":65,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100002367","name":"Chinese Academy of Sciences","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,2]]},"DOI":"10.1109\/hpca56546.2023.10070997","type":"proceedings-article","created":{"date-parts":[[2023,3,24]],"date-time":"2023-03-24T17:42:55Z","timestamp":1679679775000},"page":"429-441","source":"Crossref","is-referenced-by-count":14,"title":["CTA: Hardware-Software Co-design for Compressed Token Attention Mechanism"],"prefix":"10.1109","author":[{"given":"Haoran","family":"Wang","sequence":"first","affiliation":[{"name":"Chinese Academy of Sciences,CICS, Institute of Computing Technology"}]},{"given":"Haobo","family":"Xu","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences,CICS, Institute of Computing Technology"}]},{"given":"Ying","family":"Wang","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences,CICS, Institute of Computing Technology"}]},{"given":"Yinhe","family":"Han","sequence":"additional","affiliation":[{"name":"Chinese Academy of Sciences,CICS, Institute of Computing Technology"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Sequence to sequence learning with neural networks","volume":"27","author":"Sutskever","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref2","article-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2014"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/D14-1179"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00911"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00338"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01059"},{"key":"ref7","article-title":"Stand-alone self-attention in vision models","volume":"32","author":"Ramachandran","year":"2019","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref8","first-page":"7354","article-title":"Self-attention generative adversarial networks","volume-title":"International conference on machine learning","author":"Zhang"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/319"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICDM.2018.00035"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3357384.3357925"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.11618"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v33i01.33015941"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref15","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref16","article-title":"Roberta: A robustly optimized bert pretraining approach","author":"Liu","year":"2019"},{"key":"ref17","article-title":"Sebastian Goodman, Kevin Gimpel, Piyush Sharma, and Radu Soricut. Albert: A lite bert for self-supervised learning of language representations","author":"Lan","year":"2019"},{"issue":"8","key":"ref18","first-page":"9","article-title":"Language models are unsupervised multitask learners","volume":"1","author":"Radford","year":"2019","journal-title":"OpenAI blog"},{"key":"ref19","first-page":"1877","article-title":"Language models are few-shot learners","volume":"33","author":"Brown","year":"2020","journal-title":"Advances in neural information processing systems"},{"key":"ref20","article-title":"Training multi-billion parameter language models using model parallelism","author":"Shoeybi","year":"2019"},{"issue":"2","key":"ref21","article-title":"Turing-nlg: A 17-billion-parameter language model by microsoft","volume":"1","author":"Rosset","year":"2020","journal-title":"Microsoft Blog"},{"key":"ref22","first-page":"12837","article-title":"Convbert: Improving bert with span-based dynamic convolution","volume":"33","author":"Jiang","year":"2020","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref23","first-page":"10347","article-title":"Training data-efficient image transformers & distillation through attention","volume-title":"International Conference on Machine Learning","author":"Touvron"},{"key":"ref24","article-title":"Electra: Pre-training text encoders as discriminators rather than generators","author":"Clark","year":"2020"},{"key":"ref25","article-title":"Improving language understanding by generative pre-training","author":"Radford","year":"2018"},{"key":"ref26","first-page":"1691","article-title":"Generative pretraining from pixels","volume-title":"International conference on machine learning","author":"Chen"},{"key":"ref27","article-title":"Visualbert: A simple and performant baseline for vision and language","author":"Li","year":"2019"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.58"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001177"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/JETCAS.2019.2910232"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001163"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICCAD45719.2019.8942122"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358252"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00015"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2016.32"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1145\/3007787.3001139"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2017.55"},{"key":"ref38","article-title":"An evaluation of edge tpu accelerators for convolutional neural networks","author":"Yazdanbakhsh","year":"2021"},{"key":"ref39","article-title":"Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding","author":"Han","year":"2015"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783723"},{"key":"ref41","first-page":"382","article-title":"Bit-pragmatic deep neural network computing","volume-title":"Proceedings of the 50th Annual IEEE\/ACM International Symposium on Microarchitecture","author":"Albericio"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA47549.2020.00035"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00060"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/3470496.3527423"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1452"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D16-1264"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3097983.3098195"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.124"},{"key":"ref49","article-title":"Stacked quantizers for compositional vector compression","author":"Martinez","year":"2014"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2010.57"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995504"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2013.379"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/997817.997857"},{"key":"ref54","first-page":"518","article-title":"Similarity search in high dimensions via hashing","volume":"99","author":"Gionis","year":"1999","journal-title":"Vldb"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/69.908981"},{"key":"ref56","article-title":"Scalable techniques for clustering the web","author":"Haveliwala","year":"2000"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1093\/bioinformatics\/17.5.419"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1145\/369133.369172"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2003.1238382"},{"key":"ref60","article-title":"An engine to auto generate optimized kernels for multi backends"},{"key":"ref61","first-page":"142","article-title":"Learning word vectors for sentiment analysis","volume-title":"Proceedings of the 49th annual meeting of the association for computational linguistics: Human language technologies","author":"Maas"},{"key":"ref62","article-title":"The wikitext long term dependency language modeling dataset","volume":"9","author":"Merity","year":"2016","journal-title":"Salesforce Metamind"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-demos.6"},{"key":"ref64","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume":"32","author":"Paszke","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref65","first-page":"28","article-title":"Cacti 6.0: A tool to model large caches","volume":"27","author":"Muralimanohar","year":"2009","journal-title":"HP laboratories"}],"event":{"name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","location":"Montreal, QC, Canada","start":{"date-parts":[[2023,2,25]]},"end":{"date-parts":[[2023,3,1]]}},"container-title":["2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10070856\/10070923\/10070997.pdf?arnumber=10070997","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,3]],"date-time":"2024-03-03T06:34:13Z","timestamp":1709447653000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10070997\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2]]},"references-count":65,"URL":"https:\/\/doi.org\/10.1109\/hpca56546.2023.10070997","relation":{},"subject":[],"published":{"date-parts":[[2023,2]]}}}