{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T16:41:39Z","timestamp":1773247299320,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":24,"publisher":"ACM","license":[{"start":{"date-parts":[[2025,1,20]],"date-time":"2025-01-20T00:00:00Z","timestamp":1737331200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"funder":[{"name":"NSFC","award":["U21B2030"],"award-info":[{"award-number":["U21B2030"]}]},{"name":"NSFC","award":["92264204"],"award-info":[{"award-number":["92264204"]}]},{"DOI":"10.13039\/501100012166","name":"National Key Research and Development Program of China","doi-asserted-by":"publisher","award":["2019YFA0706100"],"award-info":[{"award-number":["2019YFA0706100"]}],"id":[{"id":"10.13039\/501100012166","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,1,20]]},"DOI":"10.1145\/3658617.3697570","type":"proceedings-article","created":{"date-parts":[[2025,3,4]],"date-time":"2025-03-04T14:32:21Z","timestamp":1741098741000},"page":"642-647","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["3D-METRO: Deploy Large-Scale Transformer Model on A Chip Using Transistor-Less 3D-Metal-ROM-Based Compute-in-Memory Macro"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-1408-5194","authenticated-orcid":false,"given":"Yiming","family":"Chen","sequence":"first","affiliation":[{"name":"Tsinghua Univ., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2939-9564","authenticated-orcid":false,"given":"Xirui","family":"Du","sequence":"additional","affiliation":[{"name":"Tsinghua Univ., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1315-9806","authenticated-orcid":false,"given":"Guodong","family":"Yin","sequence":"additional","affiliation":[{"name":"Tinghua Univ., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9994-8736","authenticated-orcid":false,"given":"Wenjun","family":"Tang","sequence":"additional","affiliation":[{"name":"Tsinghua Univ., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4892-2309","authenticated-orcid":false,"given":"Yongpan","family":"Liu","sequence":"additional","affiliation":[{"name":"Tsinghua Univ., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2421-353X","authenticated-orcid":false,"given":"Huazhong","family":"Yang","sequence":"additional","affiliation":[{"name":"Tsinghua Univ., Beijing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-8051-3345","authenticated-orcid":false,"given":"Xueqing","family":"Li","sequence":"additional","affiliation":[{"name":"Tsinghua Univ., Beijing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,3,4]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Dec.","author":"Vaswani A.","year":"2017","unstructured":"A. Vaswani et al., \"Attention Is All You Need,\" arXiv:1706.03762 [cs], Dec. 2017, Accessed: Dec. 01, 2021. [Online]."},{"key":"e_1_3_2_1_2_1","volume-title":"Accessed","author":"Dosovitskiy A.","year":"2022","unstructured":"A. Dosovitskiy et al., \"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale,\" arXiv:2010.11929 [cs], Jun. 2021, Accessed: Mar. 14, 2022. [Online]. Available: http:\/\/arxiv.org\/abs\/2010.11929"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-81-322-3972-7"},{"key":"e_1_3_2_1_4_1","volume-title":"Apr. 20","author":"Zhu D.","year":"2023","unstructured":"D. Zhu, J. Chen, X. Shen, X. Li, and M. Elhoseiny, \"MiniGPT-4: Enhancing Vision-Language Understanding with Advanced Large Language Models.\" arXiv, Apr. 20, 2023. [Online]. Available: http:\/\/arxiv.org\/abs\/2304.10592"},{"key":"e_1_3_2_1_5_1","volume-title":"Oct. 16, 2021. Accessed","author":"Hu E. J.","year":"2023","unstructured":"E. J. Hu et al., \"LoRA: Low-Rank Adaptation of Large Language Models.\" arXiv, Oct. 16, 2021. Accessed: Feb. 20, 2023."},{"issue":"1","key":"e_1_3_2_1_6_1","first-page":"217","article-title":"CONV-SRAM: An Energy-Efficient SRAM With In-Memory Dot-Product Computation for Low-Power Convolutional Neural Networks","volume":"54","author":"Biswas A.","year":"2019","unstructured":"A. Biswas and A. P. Chandrakasan, \"CONV-SRAM: An Energy-Efficient SRAM With In-Memory Dot-Product Computation for Low-Power Convolutional Neural Networks,\" JSSC, vol. 54, no. 1, pp. 217--230, Jan. 2019.","journal-title":"JSSC"},{"key":"e_1_3_2_1_7_1","volume-title":"ISSCC 2022","author":"Tu F.","year":"2022","unstructured":"F. Tu et al., \"A 28nm 29.2TFLOPS\/W BF16 and 36.5TOPS\/W INT8 Reconfigurable Digital CIM Processor with Unified FP\/INT Pipeline and Bitwise In-Memory Booth Multiplication for Cloud Deep Learning Acceleration,\" in ISSCC 2022, San Francisco, CA, USA: IEEE, Feb. 2022."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/LSSC.2021.3092727"},{"key":"e_1_3_2_1_9_1","volume-title":"16.4 An 89TOPS\/W and 16.3TOPS\/mm 2 All-Digital SRAM-Based Full-Precision Compute-In Memory Macro in 22nm for Machine-Learning Edge Applications,\" in ISSCC '21","author":"Chih Y.-D.","unstructured":"Y.-D. Chih et al., \"16.4 An 89TOPS\/W and 16.3TOPS\/mm 2 All-Digital SRAM-Based Full-Precision Compute-In Memory Macro in 22nm for Machine-Learning Edge Applications,\" in ISSCC '21, San Francisco, CA, USA."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2023.3326955"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530576"},{"key":"e_1_3_2_1_13_1","volume-title":"ACM","author":"Chen Y.","year":"2022","unstructured":"Y. Chen et al., \"Hidden-ROM: A Compute-in-ROM Architecture to Deploy Large-Scale Neural Networks on Chip with Flexible and Scalable Post-Fabrication Task Transfer Capability,\" in ICCAD '22, ACM, Oct. 2022."},{"key":"e_1_3_2_1_14_1","volume-title":"Jun. 08","author":"Kim S.","year":"2021","unstructured":"S. Kim, A. Gholami, Z. Yao, M. W. Mahoney, and K. Keutzer, \"I-BERT: Integer-only BERT Quantization.\" arXiv, Jun. 08, 2021."},{"key":"e_1_3_2_1_15_1","volume-title":"Mar. 25, 2022. Accessed","author":"Tang H.","year":"2023","unstructured":"H. Tang, X. Zhang, K. Liu, J. Zhu, and Z. Kang, \"MKQ-BERT: Quantized BERT with 4-bits Weights and Activations.\" arXiv, Mar. 25, 2022. Accessed: Nov. 17, 2023. [Online]. Available: http:\/\/arxiv.org\/abs\/2203.13483"},{"key":"e_1_3_2_1_16_1","volume-title":"May 24, 2019. Accessed","author":"Devlin J.","year":"2023","unstructured":"J. Devlin, M.-W. Chang, K. Lee, and K. Toutanova, \"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding.\" arXiv, May 24, 2019. Accessed: Nov. 17, 2023."},{"issue":"9","key":"e_1_3_2_1_17_1","doi-asserted-by":"crossref","first-page":"2817","DOI":"10.1109\/JSSC.2021.3073254","article-title":"A Local Computing Cell and 6T SRAM-Based Computing-inMemory Macro With 8-b MAC Operation for Edge AI Chips","volume":"56","author":"Si X.","year":"2021","unstructured":"X. Si et al., \"A Local Computing Cell and 6T SRAM-Based Computing-inMemory Macro With 8-b MAC Operation for Edge AI Chips,\" IEEE J. SolidState Circuits, vol. 56, no. 9, pp. 2817--2831, Sep. 2021.","journal-title":"IEEE J. SolidState Circuits"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247934"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3213542"},{"key":"e_1_3_2_1_20_1","volume-title":"Nov. 09","author":"Pope R.","year":"2022","unstructured":"R. Pope et al., \"Efficiently Scaling Transformer Inference.\" arXiv, Nov. 09, 2022. Accessed: Nov. 17, 2023. [Online]."},{"key":"e_1_3_2_1_21_1","volume-title":"Jun. 01, 2022. Accessed","author":"Chen T.","year":"2023","unstructured":"T. Chen et al., \"THE-X: Privacy-Preserving Transformer Inference with Homomorphic Encryption.\" arXiv, Jun. 01, 2022. Accessed: Nov. 17, 2023."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSSC.2015.2418155"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/TVLSI.2014.2334635"},{"key":"e_1_3_2_1_24_1","volume-title":"Apr. 03, 2016. Accessed","author":"Zhang X.","year":"2023","unstructured":"X. Zhang, J. Zhao, and Y. LeCun, \"Character-level Convolutional Networks for Text Classification.\" arXiv, Apr. 03, 2016. Accessed: Nov. 20, 2023."}],"event":{"name":"ASPDAC '25: 30th Asia and South Pacific Design Automation Conference","location":"Tokyo Japan","acronym":"ASPDAC '25","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEICE","IPSJ","IEEE CAS","IEEE CEDA"]},"container-title":["Proceedings of the 30th Asia and South Pacific Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697570","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3658617.3697570","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,18]],"date-time":"2025-06-18T23:44:19Z","timestamp":1750290259000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3658617.3697570"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,20]]},"references-count":24,"alternative-id":["10.1145\/3658617.3697570","10.1145\/3658617"],"URL":"https:\/\/doi.org\/10.1145\/3658617.3697570","relation":{},"subject":[],"published":{"date-parts":[[2025,1,20]]},"assertion":[{"value":"2025-03-04","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}