{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T01:06:58Z","timestamp":1773796018673,"version":"3.50.1"},"reference-count":51,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"6","license":[{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,6,1]],"date-time":"2025-06-01T00:00:00Z","timestamp":1748736000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62402282"],"award-info":[{"award-number":["62402282"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Parallel Distrib. Syst."],"published-print":{"date-parts":[[2025,6]]},"DOI":"10.1109\/tpds.2025.3557444","type":"journal-article","created":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T18:58:29Z","timestamp":1743706709000},"page":"1115-1129","source":"Crossref","is-referenced-by-count":1,"title":["Cube-fx: Mapping Taylor Expansion Onto Matrix Multiplier-Accumulators of Huawei Ascend AI Processors"],"prefix":"10.1109","volume":"36","author":[{"ORCID":"https:\/\/orcid.org\/0009-0009-0921-4930","authenticated-orcid":false,"given":"Yifeng","family":"Tang","sequence":"first","affiliation":[{"name":"Department of Computer Science, University of Hong Kong, Hong Kong, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6421-664X","authenticated-orcid":false,"given":"Huaman","family":"Zhou","sequence":"additional","affiliation":[{"name":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9767-2767","authenticated-orcid":false,"given":"Zhuoran","family":"Ji","sequence":"additional","affiliation":[{"name":"School of Cyber Science and Technology, Shandong University, Qingdao, Shandong, China"}]},{"given":"Cho-Li","family":"Wang","sequence":"additional","affiliation":[{"name":"Department of Computer Science, University of Hong Kong, Hong Kong, China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Toward theoretical understanding of deep learning","author":"Arora","year":"2018"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01578"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01176"},{"key":"ref4","article-title":"Zoology: Measuring and improving recall in efficient language models","author":"Arora","year":"2023"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ACSSC.2003.1292370"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ASAP.2002.1030725"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPSW.2012.177"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.33039\/ami.2021.03.004"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895534"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"ref11","article-title":"AMD instinct MI300X accelerators","year":"2023"},{"key":"ref12","article-title":"NVIDIA H100 tensor core GPU architecture","year":"2023"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ISSCC42613.2021.9366056"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/HOTCHIPS.2019.8875654"},{"key":"ref15","article-title":"Ascend-910b","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3079856.3080246"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1002\/spe.3214"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/MM.2022.3176529"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/PMBS56514.2022.00018"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS53621.2022.00117"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00071"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3330345.3331057"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ISLPED58423.2023.10244461"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2022.3152217"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/3514221.3517869"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511804441"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/9780470544402"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR.2018.8545362"},{"key":"ref29","article-title":"Intel advanced matrix extensions (Intel AMX)","year":"2023"},{"key":"ref30","article-title":"The scalable matrix extension (SME), for Armv9-A","year":"2023"},{"key":"ref31","article-title":"Apple unveils the new macbook pro featuring the M3 family of chips","year":"2023"},{"key":"ref32","article-title":"Power10: Driving performance and reducing energy consumption","year":"2023"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA51647.2021.00071"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1145\/3604802"},{"key":"ref35","article-title":"AMD CDNA-3-white-paper","year":"2023"},{"key":"ref36","first-page":"33","article-title":"Accelerating sparse matrix-matrix multiplication with the ascend AI core","volume-title":"Proc. Workshop Accelerated Mach. Learn.","author":"Moustafa","year":"2023"},{"key":"ref37","article-title":"Atlas 200 AI accelerator module 1.0.0 application software development guide","year":"2020"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1145\/1460361.1460365"},{"issue":"163","key":"ref40","article-title":"Evaluation of polynomials and evaluation of rational functions","volume":"61","author":"Motzkin","year":"1955","journal-title":"Bull. Amer. Math. Soc."},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS47924.2020.00071"},{"key":"ref42","article-title":"Cambricon BANG C developer guide","year":"2021"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/INFCOM.1998.665104"},{"key":"ref44","article-title":"Intel AVX-512 instructions","year":"2017"},{"key":"ref45","article-title":"Arm neon","year":"2009"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/TEC.1959.5222693"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS45731.2020.9180864"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.7717\/peerj-cs.103"},{"key":"ref49","article-title":"SIMD instruction set","year":"2019"},{"key":"ref50","article-title":"Fast polynomial multiplication using matrix multiplication accelerators with applications to NTRU on Apple M1\/M3 socs","volume-title":"IACR Cryptol. ePrint Arch.","volume":"2024","author":"Filho","year":"2024"},{"key":"ref51","doi-asserted-by":"crossref","DOI":"10.62056\/a6n59qgxq","article-title":"Efficient isochronous fixed-weight sampling with applications to NTRU","volume-title":"IACR Cryptol. ePrint Arch.","author":"Filho","year":"2024"}],"container-title":["IEEE Transactions on Parallel and Distributed Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/71\/10969501\/10948365.pdf?arnumber=10948365","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,18]],"date-time":"2025-04-18T17:38:58Z","timestamp":1744997938000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10948365\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,6]]},"references-count":51,"journal-issue":{"issue":"6"},"URL":"https:\/\/doi.org\/10.1109\/tpds.2025.3557444","relation":{},"ISSN":["1045-9219","1558-2183","2161-9883"],"issn-type":[{"value":"1045-9219","type":"print"},{"value":"1558-2183","type":"electronic"},{"value":"2161-9883","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,6]]}}}