{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T12:40:01Z","timestamp":1759236001268,"version":"3.44.0"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"10","license":[{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,1]],"date-time":"2025-10-01T00:00:00Z","timestamp":1759276800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"National Key Research and Development Program of China","award":["2022YFB4501600"],"award-info":[{"award-number":["2022YFB4501600"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U22A2028","62222214","62341411","62102398","62102399","62302478","62302482","62302483","62302480","62302481","62372436"],"award-info":[{"award-number":["U22A2028","62222214","62341411","62102398","62102399","62302478","62302482","62302483","62302480","62302481","62372436"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Strategic Priority Research Program of the Chinese Academy of Sciences","award":["XDB0660200","XDB0660201","XDB0660202"],"award-info":[{"award-number":["XDB0660200","XDB0660201","XDB0660202"]}]},{"name":"CAS Project for Young Scientists in Basic Research","award":["YSBR-029"],"award-info":[{"award-number":["YSBR-029"]}]},{"DOI":"10.13039\/501100002367","name":"Youth Innovation Promotion Association CAS","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100002367","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Comput.-Aided Des. Integr. Circuits Syst."],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1109\/tcad.2025.3553074","type":"journal-article","created":{"date-parts":[[2025,3,21]],"date-time":"2025-03-21T15:18:36Z","timestamp":1742570316000},"page":"3962-3975","source":"Crossref","is-referenced-by-count":0,"title":["SaaP: Rearchitect SoC-as-a-Processor to Orchestrate Hardware Heterogeneity"],"prefix":"10.1109","volume":"44","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8267-9824","authenticated-orcid":false,"given":"Pengwei","family":"Jin","sequence":"first","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhe","family":"Fan","sequence":"additional","affiliation":[{"name":"Cambricon Technologies, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5503-4457","authenticated-orcid":false,"given":"Yongwei","family":"Zhao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7603-4210","authenticated-orcid":false,"given":"Zidong","family":"Du","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Hongrui","family":"Guo","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ziyuan","family":"Nan","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9823-2573","authenticated-orcid":false,"given":"Yifan","family":"Hao","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-2215-4892","authenticated-orcid":false,"given":"Chongxiao","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-4204-0686","authenticated-orcid":false,"given":"Tianyun","family":"Ma","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhenxing","family":"Zhang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7748-7967","authenticated-orcid":false,"given":"Xiaqing","family":"Li","sequence":"additional","affiliation":[{"name":"Department of Computer Science and Technology, Beijing Jiaotong University, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-0237-1034","authenticated-orcid":false,"given":"Wei","family":"Li","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9979-0561","authenticated-orcid":false,"given":"Xing","family":"Hu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2530-5874","authenticated-orcid":false,"given":"Qi","family":"Guo","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1480-7265","authenticated-orcid":false,"given":"Zhiwei","family":"Xu","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Processors, Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7601-0753","authenticated-orcid":false,"given":"Tianshi","family":"Chen","sequence":"additional","affiliation":[{"name":"Cambricon Technologies, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"volume-title":"Apple A15","year":"2021","key":"ref1"},{"volume-title":"Jetson AGX xavier new era autonomous machines","year":"2024","key":"ref2"},{"volume-title":"A new golden age for computer architecture","year":"2019","key":"ref3"},{"volume-title":"NVIDIA\u2019S XAVIER SOC","year":"2018","key":"ref4"},{"key":"ref5","first-page":"265","article-title":"TensorFlow: A system for large-scale machine learning","volume-title":"Proc. 12th USENIX Symp. Oper. Syst. Design Implement. (OSDI)","author":"Abadi"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750386"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1002\/cpe.1631"},{"issue":"1","key":"ref8","doi-asserted-by":"crossref","first-page":"85","DOI":"10.1016\/S0045-7825(97)00183-7","article-title":"Large-scale simulation of elastic wave propagation in heterogeneous media on parallel computers","volume":"152","author":"Bao","year":"1998","journal-title":"Comput. Methods Appl. Mech. Eng."},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3093337.3037738"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/3317550.3321441"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3173162.3173177"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2012.58"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00033"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA45697.2020.00044"},{"key":"ref15","article-title":"MXNet: A flexible and efficient machine learning library for heterogeneous distributed systems","author":"Chen","year":"2015","journal-title":"arXiv:1512.01274"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/2541940.2541967"},{"key":"ref17","first-page":"367","article-title":"Eyeriss: A spatial architecture for energy-efficient dataflow for convolutional neural networks","volume-title":"Proc. ACM\/IEEE 43rd Annu. Int. Symp. Comput. Archit. (ISCA)","author":"Chen"},{"key":"ref18","first-page":"1","article-title":"Flashattention: Fast and memory-efficient exact attention with IO-awareness","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NeurIPS)","author":"Dao"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750389"},{"issue":"1","key":"ref20","doi-asserted-by":"crossref","first-page":"209","DOI":"10.1109\/TC.2020.3044245","article-title":"Breaking the interaction wall: A DLPU-centric deep learning computing system","volume":"71","author":"Du","year":"2022","journal-title":"IEEE Trans. Comput."},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2010.13"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD.2014.6974710"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/40.612211"},{"key":"ref24","first-page":"922","article-title":"AWB-GCN: A graph convolutional network accelerator with runtime workload rebalancing","volume-title":"Proc. 53rd Annu. IEEE\/ACM Int. Symp. Microarchit. (MICRO)","author":"Geng"},{"key":"ref25","first-page":"1","article-title":"Matrix capsules with EM routing","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Hinton"},{"volume-title":"Kirin 970","year":"2024","key":"ref26"},{"volume-title":"Apple A11","year":"2017","key":"ref27"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2015.102"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783722"},{"key":"ref30","first-page":"1","article-title":"Semi-supervised classification with graph convolutional networks","volume-title":"Proc. 5th Int. Conf. Learn. Represent.","author":"Kipf"},{"key":"ref31","first-page":"81","article-title":"Single-ISA heterogeneous multi-core architectures: The potential for processor power reduction","volume-title":"Proc. 22nd Digit. Avionics Syst. Conf. Process.","author":"Kumar"},{"key":"ref32","first-page":"775","article-title":"GCNAX: A flexible and energy-efficient accelerator for graph convolutional neural networks","volume-title":"Proc. IEEE Int. Symp. High-Perform. Comput. Archit. (HPCA)","author":"Li"},{"key":"ref33","first-page":"1","article-title":"LightRNN: Memory and computation-efficient recurrent neural networks","volume-title":"Proc. Adv. Neural Inf. Process. Syst. (NIPS)","author":"Li"},{"key":"ref34","first-page":"1","article-title":"E-RNN: Design optimization for efficient recurrent neural networks in FPGAs","volume-title":"Proc. IEEE Int. Symp. High Perform. Comput. Archit. (HPCA)","author":"Li"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/MC.2013.294"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TRO.2015.2463671"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2016.7783748"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1145\/2749469.2750382"},{"key":"ref39","first-page":"163","article-title":"DISC: Dynamic instruction stream computer","volume-title":"Proc. 24th Annu. Int. Symp. Microarchit.","author":"Nemirovsky"},{"volume-title":"CUDA, release: 10.2.89.","year":"2020","key":"ref40"},{"volume-title":"NVIDIA tesla v100 GPU architecture","year":"2017","key":"ref41"},{"key":"ref42","first-page":"8024","article-title":"PyTorch: An imperative style, high-performance deep learning library","volume-title":"Advances in Neural Information Processing Systems","volume":"32","author":"Paszke","year":"2019"},{"key":"ref43","article-title":"YOLOv3: An incremental improvement","author":"Redmon","year":"2018","journal-title":"arXiv:1804.02767"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA.2018.00047"},{"volume-title":"The next-level processor for the mobile future","year":"2025","key":"ref45"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/ISVLSI.2014.94"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1145\/3183713.3183735"},{"key":"ref48","first-page":"414","article-title":"Multiscalar processors","volume-title":"Proc. 22nd Annu. Int. Symp. Comput. Archit.","author":"Sohi"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2018.00052"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/TPDS.2015.2430861"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s11265-015-1058-5"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/2744769.2744902"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/71.993206"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA.2019.00026"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1145\/2872362.2872408"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.1145\/2678373.2665692"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TC.2014.2315628"},{"volume-title":"A brief guide of xPU for AI accelerators","year":"2018","author":"Xie","key":"ref58"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358318"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO.2014.60"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1145\/3307650.3322226"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1145\/3352460.3358256"}],"container-title":["IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/43\/11155107\/10935670.pdf?arnumber=10935670","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,30]],"date-time":"2025-09-30T12:21:32Z","timestamp":1759234892000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10935670\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10]]},"references-count":62,"journal-issue":{"issue":"10"},"URL":"https:\/\/doi.org\/10.1109\/tcad.2025.3553074","relation":{},"ISSN":["0278-0070","1937-4151"],"issn-type":[{"type":"print","value":"0278-0070"},{"type":"electronic","value":"1937-4151"}],"subject":[],"published":{"date-parts":[[2025,10]]}}}