{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T21:42:45Z","timestamp":1773265365007,"version":"3.50.1"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T00:00:00Z","timestamp":1768780800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,1,19]],"date-time":"2026-01-19T00:00:00Z","timestamp":1768780800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026,1,19]]},"DOI":"10.1109\/asp-dac66049.2026.11420504","type":"proceedings-article","created":{"date-parts":[[2026,3,10]],"date-time":"2026-03-10T19:51:15Z","timestamp":1773172275000},"page":"311-317","source":"Crossref","is-referenced-by-count":0,"title":["DeepPiC: xPU-PIM Cluster Architecture with Adaptive Resource-Aware Task Orchestration for DeepSeek-Style MoE Inference"],"prefix":"10.1109","author":[{"given":"Zixu","family":"Li","sequence":"first","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"}]},{"given":"Manni","family":"Li","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"}]},{"given":"Zijian","family":"Huang","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"}]},{"given":"Jiayu","family":"Yang","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"}]},{"given":"Wending","family":"Zhao","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"}]},{"given":"Yinyin","family":"Lin","sequence":"additional","affiliation":[{"name":"Fudan University,State Key Laboratory of Integrated Chips and Systems School of Microelectronics,Shanghai,China"}]},{"given":"Chengchen","family":"Wang","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China"}]},{"given":"Haidong","family":"Tian","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China"}]},{"given":"Xiankui","family":"Xiong","sequence":"additional","affiliation":[{"name":"State Key Laboratory of Mobile Multimedia Technology ZTE Corporation,Shenzhen,China"}]}],"member":"263","reference":[{"key":"ref1","article-title":"DeepSeek-V3 Technical Report","author":"DeepSeek-AI","year":"2025"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3695053.3731412"},{"key":"ref3","article-title":"Attention Is All You Need","author":"Vaswani","year":"2023"},{"key":"ref4","article-title":"Cost effective LLM inference solution using SK hynix\u2019s AiM (Accelerator-in-Memory)","author":"Kwon","year":"2023","journal-title":"SC23"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/MICRO50266.2020.00040"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/isscc42613.2021.9365862"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ISCA52012.2021.00013"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/JSSC.2022.3200718"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/HCS55958.2022.9895629"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/HCS59251.2023.10254717"},{"key":"ref11","article-title":"Samsung PIM\/PNM for Transformer based AI: Energy Efficiency on PIM\/PNM Cluster","author":"Kim","year":"2023","journal-title":"Hot Chips"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607102"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/HPCA61900.2025.00102"},{"key":"ref14","first-page":"961","article-title":"SmartMoE: Efficiently training SparselyActivated models through combining offline and online parallelization","volume-title":"2023 USENIX Annual Technical Conference (USENIX ATC 23).","author":"Zhai"},{"key":"ref15","article-title":"FastMoE: A Fast Mixture-of-Expert Training System","author":"He","year":"2021"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651324"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1145\/3620665.3640422"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1145\/3620666.3651380"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCAD.2025.3555184"},{"key":"ref20","article-title":"GitHub - deepseek-ai\/DeepEP: DeepEP: An efficient expert-parallel communication library","year":"2025","journal-title":"Deepseek-Ai"},{"key":"ref21","article-title":"NVIDIA DGX A100 The Universal System for AI Infrastructure","year":"2020","journal-title":"NVIDIA"},{"key":"ref22","article-title":"Dgx-2\/2h system","year":"2018","journal-title":"NVIDIA"},{"key":"ref23","article-title":"NVIDIA DGX B200 Datasheet","year":"2024","journal-title":"NVIDIA"},{"key":"ref24","article-title":"Serving Large Language Models on Huawei CloudMatrix384","author":"Zuo","year":"2025"},{"key":"ref25","article-title":"NVIDIA H200 Tensor Core GPU Supercharging AI and HPC workloads","year":"2024","journal-title":"NVIDIA"},{"key":"ref26","article-title":"Calculon-Ai\/Calculon","year":"2025","journal-title":"Georgia Institute of Technology and NVIDIA"},{"key":"ref27","article-title":"CMU-SAFARI\/ramulator2","author":"ETH Zurich","year":"2025"},{"key":"ref28","article-title":"Scale-snu\/attacc_simulator","year":"2025"}],"event":{"name":"2026 31st Asia and South Pacific Design Automation Conference (ASP-DAC)","location":"Lantau, Hong Kong","start":{"date-parts":[[2026,1,19]]},"end":{"date-parts":[[2026,1,22]]}},"container-title":["2026 31st Asia and South Pacific Design Automation Conference (ASP-DAC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11420221\/11420229\/11420504.pdf?arnumber=11420504","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T05:37:18Z","timestamp":1773207438000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11420504\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,1,19]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/asp-dac66049.2026.11420504","relation":{},"subject":[],"published":{"date-parts":[[2026,1,19]]}}}