{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T05:05:14Z","timestamp":1750309514507,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":23,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,6,23]],"date-time":"2024-06-23T00:00:00Z","timestamp":1719100800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"DOE","award":["DE-SC0024576 DE-SC0024428"],"award-info":[{"award-number":["DE-SC0024576 DE-SC0024428"]}]},{"name":"DARPA","award":["HR00112420004 FA8750-23-2-0501"],"award-info":[{"award-number":["HR00112420004 FA8750-23-2-0501"]}]},{"DOI":"10.13039\/100000001","name":"National Science Foundation","doi-asserted-by":"publisher","award":["2408925 2404036"],"award-info":[{"award-number":["2408925 2404036"]}],"id":[{"id":"10.13039\/100000001","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,6,23]]},"DOI":"10.1145\/3649329.3658253","type":"proceedings-article","created":{"date-parts":[[2024,11,7]],"date-time":"2024-11-07T19:27:22Z","timestamp":1731007642000},"page":"1-6","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["On the Design of Novel Attention Mechanism for Enhanced Efficiency of Transformers"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0354-2940","authenticated-orcid":false,"given":"Sumit Kumar","family":"Jha","sequence":"first","affiliation":[{"name":"Computer Science Department, Florida International University, Miami, FL, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5983-9095","authenticated-orcid":false,"given":"Susmit","family":"Jha","sequence":"additional","affiliation":[{"name":"Computer Science Laboratory, SRI International, Menlo Park, CA, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4183-6926","authenticated-orcid":false,"given":"Rickard","family":"Ewetz","sequence":"additional","affiliation":[{"name":"Electrical and Computer Engineering, University of Central Florida, Orlando, FL, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6757-105X","authenticated-orcid":false,"given":"Alvaro","family":"Velasquez","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Colorado Boulder, Boulder, CO, United States"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,11,7]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Automation & Test in Europe Conference (DATE). IEEE, 786--791","author":"Chen Gang","year":"2020","unstructured":"Gang Chen, Shengyu He, Haitao Meng, and Kai Huang. 2020. Phonebit: Efficient gpu-accelerated binary neural network inference engine for mobile phones. In 2020 Design, Automation & Test in Europe Conference (DATE). IEEE, 786--791."},{"key":"e_1_3_2_1_2_1","unstructured":"Krzysztof Choromanski Valerii Likhosherstov David Dohan Xingyou Song Andreea Gane Tamas Sarlos Peter Hawkins Jared Davis Afroz Mohiuddin Lukasz Kaiser et al. 2020. Rethinking attention with performers. arXiv preprint arXiv:2009.14794 (2020)."},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530451"},{"volume-title":"Low-Power Computer Vision","author":"Gholami Amir","key":"e_1_3_2_1_4_1","unstructured":"Amir Gholami, Sehoon Kim, Zhen Dong, Zhewei Yao, Michael W Mahoney, and Kurt Keutzer. 2022. A survey of quantization methods for efficient neural network inference. In Low-Power Computer Vision. Chapman and Hall\/CRC, 291--326."},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSII.2018.2821678"},{"key":"e_1_3_2_1_6_1","unstructured":"IEA. 2023. Data centres & networks. https:\/\/www.iea.org\/energy-system\/buildings\/data-centres-and-data-transmission-networks. [Accessed 2023]."},{"volume-title":"Towards automated system synthesis using sciduction","author":"Jha Susmit Kumar","key":"e_1_3_2_1_7_1","unstructured":"Susmit Kumar Jha. 2011. Towards automated system synthesis using sciduction. University of California, Berkeley."},{"key":"e_1_3_2_1_8_1","first-page":"319","article-title":"Computation of boolean formulas using sneak paths in crossbar computing","volume":"9","author":"Jha Sumit Kumar","year":"2016","unstructured":"Sumit Kumar Jha, Dilia E Rodriguez, Joseph E Van Nostrand, and Alvaro Velasquez. 2016. Computation of boolean formulas using sneak paths in crossbar computing. US Patent 9,319,047.","journal-title":"US Patent"},{"key":"e_1_3_2_1_9_1","volume-title":"Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351","author":"Jiao Xiaoqi","year":"2019","unstructured":"Xiaoqi Jiao, Yichun Yin, Lifeng Shang, Xin Jiang, Xiao Chen, Linlin Li, Fang Wang, and Qun Liu. 2019. Tinybert: Distilling bert for natural language understanding. arXiv preprint arXiv:1909.10351 (2019)."},{"key":"e_1_3_2_1_10_1","volume-title":"Algorithm-Software-Hardware Co-Design for Deep Learning Acceleration. In 2023 60th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--4.","author":"Li Zhengang","year":"2023","unstructured":"Zhengang Li, Yanyue Xie, Peiyan Dong, Olivia Chen, and Yanzhi Wang. 2023. Algorithm-Software-Hardware Co-Design for Deep Learning Acceleration. In 2023 60th ACM\/IEEE Design Automation Conference (DAC). IEEE, 1--4."},{"key":"e_1_3_2_1_11_1","volume-title":"The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits. arXiv:2402.17764","author":"Ma Shuming","year":"2024","unstructured":"Shuming Ma, Hongyu Wang, Lingxiao Ma, Lei Wang, Wenhui Wang, Shaohan Huang, Li Dong, Ruiping Wang, Jilong Xue, and Furu Wei. 2024. The Era of 1-bit LLMs: All Large Language Models are in 1.58 Bits. arXiv:2402.17764 (2024)."},{"key":"e_1_3_2_1_12_1","first-page":"961","article-title":"Design and fabrication of flow-based edge detection memristor crossbar circuits","volume":"67","author":"Pannu Jodh Singh","year":"2020","unstructured":"Jodh Singh Pannu, Sunny Raj, Steven Lawrence Fernandes, Dwaipayan Chakraborty, Sarah Rafiq, Nathaniel Cady, and Sumit Kumar Jha. 2020. Design and fabrication of flow-based edge detection memristor crossbar circuits. IEEE Transactions on Circuits and Systems II: Express Briefs 67, 5 (2020), 961--965.","journal-title":"IEEE Transactions on Circuits and Systems II: Express Briefs"},{"key":"e_1_3_2_1_13_1","volume-title":"Carbon emissions and large neural network training. arXiv preprint arXiv:2104.10350","author":"Patterson David","year":"2021","unstructured":"David Patterson, Joseph Gonzalez, Quoc Le, Chen Liang, Lluis-Miquel Munguia, Daniel Rothchild, David So, Maud Texier, and Jeff Dean. 2021. Carbon emissions and large neural network training. arXiv preprint arXiv:2104.10350 (2021)."},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530585"},{"key":"e_1_3_2_1_15_1","volume-title":"Megatron-lm: Training multi-billion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053","author":"Shoeybi Mohammad","year":"2019","unstructured":"Mohammad Shoeybi, Mostofa Patwary, Raul Puri, Patrick LeGresley, Jared Casper, and Bryan Catanzaro. 2019. Megatron-lm: Training multi-billion parameter language models using model parallelism. arXiv preprint arXiv:1909.08053 (2019)."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/DAC18074.2021.9586134"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/MSE.2007.44"},{"key":"e_1_3_2_1_18_1","volume-title":"Energy and policy considerations for deep learning in NLP. arXiv preprint arXiv:1906.02243","author":"Strubell Emma","year":"2019","unstructured":"Emma Strubell, Ananya Ganesh, and Andrew McCallum. 2019. Energy and policy considerations for deep learning in NLP. arXiv preprint arXiv:1906.02243 (2019)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1145\/3489517.3530618"},{"key":"e_1_3_2_1_20_1","volume-title":"Bitnet: Scaling 1-bit transformers for large language models. arXiv preprint arXiv:2310.11453","author":"Wang Hongyu","year":"2023","unstructured":"Hongyu Wang, Shuming Ma, Li Dong, Shaohan Huang, Huaijie Wang, Lingxiao Ma, Fan Yang, Ruiping Wang, Yi Wu, and Furu Wei. 2023. Bitnet: Scaling 1-bit transformers for large language models. arXiv preprint arXiv:2310.11453 (2023)."},{"key":"e_1_3_2_1_21_1","volume-title":"Linformer: Self-attention with linear complexity. arXiv:2006.04768","author":"Wang Sinong","year":"2020","unstructured":"Sinong Wang, Belinda Z Li, Madian Khabsa, Han Fang, and Hao Ma. 2020. Linformer: Self-attention with linear complexity. arXiv:2006.04768 (2020)."},{"key":"e_1_3_2_1_22_1","first-page":"311","article-title":"On the singular values of a product of matrices","volume":"74","author":"Watkins William","year":"1970","unstructured":"William Watkins. 1970. On the singular values of a product of matrices. Journal of Research, National Bureau of Standards: Mathematics and mathematical physics. Section B 74, 4 (1970), 311.","journal-title":"Journal of Research, National Bureau of Standards: Mathematics and mathematical physics. Section B"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICPADS51040.2020.00026"}],"event":{"name":"DAC '24: 61st ACM\/IEEE Design Automation Conference","sponsor":["SIGDA ACM Special Interest Group on Design Automation","IEEE-CEDA","SIGBED ACM Special Interest Group on Embedded Systems"],"location":"San Francisco CA USA","acronym":"DAC '24"},"container-title":["Proceedings of the 61st ACM\/IEEE Design Automation Conference"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3658253","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649329.3658253","content-type":"application\/pdf","content-version":"vor","intended-application":"syndication"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3649329.3658253","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:18:01Z","timestamp":1750295881000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3649329.3658253"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,23]]},"references-count":23,"alternative-id":["10.1145\/3649329.3658253","10.1145\/3649329"],"URL":"https:\/\/doi.org\/10.1145\/3649329.3658253","relation":{},"subject":[],"published":{"date-parts":[[2024,6,23]]},"assertion":[{"value":"2024-11-07","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}