{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,19]],"date-time":"2026-06-19T02:17:18Z","timestamp":1781835438387,"version":"3.54.5"},"reference-count":24,"publisher":"Springer Science and Business Media LLC","issue":"6","license":[{"start":{"date-parts":[[2020,8,11]],"date-time":"2020-08-11T00:00:00Z","timestamp":1597104000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,8,11]],"date-time":"2020-08-11T00:00:00Z","timestamp":1597104000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Sign Process Syst"],"published-print":{"date-parts":[[2021,6]]},"DOI":"10.1007\/s11265-020-01582-7","type":"journal-article","created":{"date-parts":[[2020,8,11]],"date-time":"2020-08-11T09:30:36Z","timestamp":1597138236000},"page":"605-615","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["AxSA: On the Design of High-Performance and Power-Efficient Approximate Systolic Arrays for Matrix Multiplication"],"prefix":"10.1007","volume":"93","author":[{"given":"Haroon","family":"Waris","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chenghua","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8398-8648","authenticated-orcid":false,"given":"Weiqiang","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Fabrizio","family":"Lombardi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,8,11]]},"reference":[{"issue":"2","key":"1582_CR1","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/MSSC.2014.2385965","volume":"7","author":"RH Dennard","year":"2015","unstructured":"Dennard, R.H. (2015). Past progress and future challenges in LSI technology: from DRAM and scaling to ultra-low-power CMOS. IEEE Solid-State Circuits Magazine, 7(2), 29\u201338.","journal-title":"IEEE Solid-State Circuits Magazine"},{"issue":"2","key":"1582_CR2","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1109\/MDAT.2016.2573586","volume":"34","author":"A Pedram","year":"2017","unstructured":"Pedram, A., Richardson, S., Horowitz, M., Galal, S., & Kvatinsky, S. (2017). Dark memory and accelerator-rich system optimization in the dark silicon era. IEEE Design & Test, 34(2), 39\u201350.","journal-title":"IEEE Design & Test"},{"issue":"3","key":"1582_CR3","doi-asserted-by":"publisher","first-page":"394","DOI":"10.1109\/JPROC.2020.2975695","volume":"108","author":"W Liu","year":"2020","unstructured":"Liu, W., Lombardi, F., & Shulte, M. (2020). A retrospective and prospective view of approximate computing [point of view]. Proceedings of the IEEE, 108(3), 394\u2013399.","journal-title":"Proceedings of the IEEE"},{"issue":"1","key":"1582_CR4","doi-asserted-by":"publisher","first-page":"71","DOI":"10.1109\/78.365287","volume":"43","author":"AE Yagle","year":"1995","unstructured":"Yagle, A.E. (1995). Fast algorithms for matrix multiplication using pseudo-number-theoretic-transforms. IEEE Transactions on Signal Processing, 43(1), 71\u201376.","journal-title":"IEEE Transactions on Signal Processing"},{"issue":"8","key":"1582_CR5","doi-asserted-by":"publisher","first-page":"705","DOI":"10.1109\/71.706044","volume":"9","author":"K Li","year":"1998","unstructured":"Li, K., Pan, Y., & Zheng, S.Q. (1998). Fast and processor efficient parallel matrix multiplication algorithms on a linear array with a reconfigurable pipelined bus system. IEEE Transactions on Parallel and Distributed Systems, 9(8), 705\u2013720.","journal-title":"IEEE Transactions on Parallel and Distributed Systems"},{"key":"1582_CR6","doi-asserted-by":"crossref","unstructured":"Cohn, H., Kleinberg, R., Szegedy, B., & Umans, C. (2005). Group-theoretic algorithms for matrix multiplication. IEEE International Symposium Foundation of Computer Science, pp. 379\u2013388.","DOI":"10.1109\/SFCS.2005.39"},{"issue":"17","key":"1582_CR7","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1016\/j.ipl.2013.05.011","volume":"113","author":"J Oh","year":"2013","unstructured":"Oh, J., Kim, J., & Moon, B.-R. (2013). On the inequivalence of bilinear algorithms for 3\u00d73 matrix multiplication. Information Processing Letters, 113(17), 640\u2013645.","journal-title":"Information Processing Letters"},{"issue":"11","key":"1582_CR8","doi-asserted-by":"publisher","first-page":"989","DOI":"10.1109\/TC.1986.1676700","volume":"35","author":"PJ Varman","year":"1986","unstructured":"Varman, P.J., & Ramakrishnan, I.V. (1986). Synthesis of an optimal family of matrix multiplication algorithms on linear arrays. IEEE Transactions on Computers, 35(11), 989\u2013996.","journal-title":"IEEE Transactions on Computers"},{"issue":"1","key":"1582_CR9","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1109\/43.3127","volume":"7","author":"SW Chan","year":"1988","unstructured":"Chan, S.W., & Wey, C.L. (1988). The design of concurrent error diagnosable sytolic arrays for band matrix multiplications. IEEE Transactions on Computer Aided Design, 7(1), 21\u201337.","journal-title":"IEEE Transactions on Computer Aided Design"},{"issue":"1","key":"1582_CR10","doi-asserted-by":"publisher","first-page":"27","DOI":"10.1049\/iet-cds.2013.0117","volume":"8","author":"P Saha","year":"2014","unstructured":"Saha, P., Banerjee, A., Bhattacharyya, P., & Dandapat, A. (2014). Improved matrix multiplier design for high-speed digital signal processing applications. IET Circuits, Devices and Systems, 8(1), 27\u201337.","journal-title":"IET Circuits, Devices and Systems"},{"issue":"1","key":"1582_CR11","doi-asserted-by":"publisher","first-page":"37","DOI":"10.1109\/MC.1982.1653825","volume":"15","author":"HT Kung","year":"1982","unstructured":"Kung, H.T. (1982). Why systolic architectures? Computer, 15(1), 37\u201346.","journal-title":"Computer"},{"issue":"7","key":"1582_CR12","doi-asserted-by":"publisher","first-page":"1614","DOI":"10.1109\/TVLSI.2019.2903289","volume":"27","author":"J Pan","year":"2019","unstructured":"Pan, J., Lee, C., Sghaier, A., Zeghid, M., & Xie, J. (2019). Novel systolization of subquadratic space complexity multipliers based on toeplitz matrix\u2013vector product approach. IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 27(7), 1614\u20131622.","journal-title":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems"},{"key":"1582_CR13","doi-asserted-by":"crossref","unstructured":"Montagne, E., & Sur\u00f3s, R. (2019). Systolic sparse matrix vector multiply in the age of TPUs and accelerators. Spring Simulation Conference (SpringSim), pp. 1\u201310.","DOI":"10.23919\/SpringSim.2019.8732860"},{"issue":"2","key":"1582_CR14","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1109\/LCA.2019.2924007","volume":"18","author":"G Shomron","year":"2019","unstructured":"Shomron, G., Horowitz, T., & Weiser U. (2019). SMT-SA: simultaneous multithreading in systolic arrays. IEEE Computer Architecture Letters, 18(2), 99\u2013102.","journal-title":"IEEE Computer Architecture Letters"},{"key":"1582_CR15","doi-asserted-by":"crossref","unstructured":"Olsen, E.B. (2018). RNS hardware matrix multiplier for high precision neural network acceleration: RNS TPU. IEEE International Symposium on Circuits and Systems (ISCAS), pp. 1\u20135.","DOI":"10.1109\/ISCAS.2018.8351352"},{"key":"1582_CR16","doi-asserted-by":"crossref","unstructured":"Cong, J., & Wang, J. (2018). Automatic interior I\/O elimination in systolic array architecture. IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), pp. 228.","DOI":"10.1109\/FCCM.2018.00062"},{"key":"1582_CR17","doi-asserted-by":"crossref","unstructured":"Jouppi, N.P. (2017). In-datacenter performance analysis of a tensor processing unit. ACM\/IEEE 44th Annual International Symposium on Computer Architecture (ISCA), pp. 1\u201312.","DOI":"10.1145\/3079856.3080246"},{"key":"1582_CR18","doi-asserted-by":"crossref","unstructured":"Waris, H., Wang, C., Liu, W., & Lombardi, F. (2019). Design and evaluation of a power-efficient approximate systolic array architecture for matrix multiplication. IEEE International Workshop on Signal Processing Systems (SiPS), pp. 13\u201318.","DOI":"10.1109\/SiPS47522.2019.9020404"},{"key":"1582_CR19","doi-asserted-by":"crossref","unstructured":"Chen, K., Lombardi, F., & Han, J. (2015). Matrix multiplication by an inexact systolic array. IEEE Symposium on Nanoscale Architectures (NANOARCH), pp. 151\u2013156.","DOI":"10.1109\/NANOARCH.2015.7180604"},{"issue":"6","key":"1582_CR20","doi-asserted-by":"publisher","first-page":"20190043","DOI":"10.1587\/elex.16.20190043","volume":"16","author":"H Waris","year":"2019","unstructured":"Waris, H., Wang, C., & Liu, W. (2019). High-performance approximate half and full adder cells using NAND logic gate. IEICE Electronics Express, 16(6), 20190043.","journal-title":"IEICE Electronics Express"},{"issue":"7","key":"1582_CR21","doi-asserted-by":"publisher","first-page":"692","DOI":"10.1109\/12.863039","volume":"49","author":"W-C Yeh","year":"2000","unstructured":"Yeh, W.-C., & Jen, C.-W. (2000). High-speed booth encoded parallel multiplier design. IEEE Transactions on Computers, 49(7), 692\u2013701.","journal-title":"IEEE Transactions on Computers"},{"key":"1582_CR22","doi-asserted-by":"publisher","DOI":"10.1002\/9780470974681","volume-title":"Digital Design of Signal Processing Systems: a Practical Approach","author":"SA Khan","year":"2011","unstructured":"Khan, S.A. (2011). Digital Design of Signal Processing Systems: a Practical Approach. New York: Wiley."},{"issue":"9","key":"1582_CR23","doi-asserted-by":"publisher","first-page":"1760","DOI":"10.1109\/TC.2012.146","volume":"62","author":"J Liang","year":"2013","unstructured":"Liang, J., Han, J., & Lombardi, F. (2013). New metrics for the reliability of approximate and probabilistic adders. IEEE Transactions on Computers, 62(9), 1760\u20131771.","journal-title":"IEEE Transactions on Computers"},{"issue":"1","key":"1582_CR24","doi-asserted-by":"publisher","first-page":"168","DOI":"10.1109\/TCSVT.2013.2276862","volume":"24","author":"PK Meher","year":"2014","unstructured":"Meher, P.K., Park, S.Y., Mohanty, B.K., Lim, K.S., & Yeo, C. (2014). Efficient Integer DCT Architectures for HEVC. IEEE Transactions on Circuits and Systems for Video Technology, 24(1), 168\u2013178.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"}],"container-title":["Journal of Signal Processing Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-020-01582-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11265-020-01582-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11265-020-01582-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2021,8,11]],"date-time":"2021-08-11T00:00:19Z","timestamp":1628640019000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11265-020-01582-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8,11]]},"references-count":24,"journal-issue":{"issue":"6","published-print":{"date-parts":[[2021,6]]}},"alternative-id":["1582"],"URL":"https:\/\/doi.org\/10.1007\/s11265-020-01582-7","relation":{},"ISSN":["1939-8018","1939-8115"],"issn-type":[{"value":"1939-8018","type":"print"},{"value":"1939-8115","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,8,11]]},"assertion":[{"value":"4 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 April 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 July 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}