{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,26]],"date-time":"2026-04-26T05:40:02Z","timestamp":1777182002878,"version":"3.51.4"},"reference-count":12,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"am","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,3,1]],"date-time":"2025-03-01T00:00:00Z","timestamp":1740787200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100000185","name":"Defense Advanced Research Projects Agency","doi-asserted-by":"publisher","award":["4202290027"],"award-info":[{"award-number":["4202290027"]}],"id":[{"id":"10.13039\/100000185","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CAREER-2144766"],"award-info":[{"award-number":["CAREER-2144766"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["PPoSS-2217099"],"award-info":[{"award-number":["PPoSS-2217099"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100008982","name":"National Science Foundation","doi-asserted-by":"publisher","award":["CNS-2211382"],"award-info":[{"award-number":["CNS-2211382"]}],"id":[{"id":"10.13039\/501100008982","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Micro"],"published-print":{"date-parts":[[2025,3]]},"DOI":"10.1109\/mm.2025.3540663","type":"journal-article","created":{"date-parts":[[2025,2,13]],"date-time":"2025-02-13T18:38:14Z","timestamp":1739471894000},"page":"18-25","source":"Crossref","is-referenced-by-count":2,"title":["Spine-Free Networks for Large Language Model Training"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7693-9925","authenticated-orcid":false,"given":"Weiyang","family":"Wang","sequence":"first","affiliation":[{"name":"Massachusetts Institute of Technology Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4095-1519","authenticated-orcid":false,"given":"Manya","family":"Ghobadi","sequence":"additional","affiliation":[{"name":"Massachusetts Institute of Technology Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"}]}],"member":"263","reference":[{"key":"ref1","article-title":"The Llama 3 herd of models,","author":"Grattafiori","year":"2024"},{"key":"ref2","article-title":"Doubling all2all performance with NVIDIA collective communication library 2.12,","volume-title":"Nvidia","author":"Mandakolathur","year":"2022"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/2934872.2934908"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1145\/3458817.3476209"},{"key":"ref5","volume-title":"NVIDIA DGX SuperPOD","year":"2024"},{"key":"ref6","first-page":"18,332","article-title":"DeepSpeed-MoE: Advancing mixture-of-experts inference and training to power next-generation AI scale,","volume-title":"Proc. Mach. Learn. Res.","volume":"162","author":"Rajbhandari","year":"2022"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1145\/3098822.3098838"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3579371.3589350"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1145\/3581784.3607102"},{"key":"ref10","article-title":"Reducing activation recomputation in large transformer models,","author":"Korthikanti","year":"2022"},{"key":"ref11","article-title":"TopoOpt: Co-optimizing network topology and parallelization strategy for distributed training jobs","author":"Wang","year":"2023"},{"key":"ref12","article-title":"NVIDIA Docs Hub: QM9700\/QM9790.","volume-title":"Nvidia","year":"2023"}],"container-title":["IEEE Micro"],"original-title":[],"link":[{"URL":"https:\/\/ieeexplore.ieee.org\/ielam\/40\/10989057\/10884699-aam.pdf","content-type":"application\/pdf","content-version":"am","intended-application":"syndication"},{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/40\/10989057\/10884699.pdf?arnumber=10884699","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,8]],"date-time":"2025-05-08T17:38:39Z","timestamp":1746725919000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10884699\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3]]},"references-count":12,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/mm.2025.3540663","relation":{},"ISSN":["0272-1732","1937-4143"],"issn-type":[{"value":"0272-1732","type":"print"},{"value":"1937-4143","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3]]}}}