{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,29]],"date-time":"2026-03-29T06:06:06Z","timestamp":1774764366340,"version":"3.50.1"},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T00:00:00Z","timestamp":1736294400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T00:00:00Z","timestamp":1736294400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat Comput Sci"],"DOI":"10.1038\/s43588-024-00753-x","type":"journal-article","created":{"date-parts":[[2025,1,8]],"date-time":"2025-01-08T10:07:45Z","timestamp":1736330865000},"page":"13-26","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Efficient scaling of large language models with mixture of experts and 3D analog in-memory computing"],"prefix":"10.1038","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9495-7150","authenticated-orcid":false,"given":"Julian","family":"B\u00fcchel","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0001-9081-6139","authenticated-orcid":false,"given":"Athanasios","family":"Vasilopoulos","sequence":"additional","affiliation":[]},{"given":"William Andrew","family":"Simon","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4255-8622","authenticated-orcid":false,"given":"Irem","family":"Boybat","sequence":"additional","affiliation":[]},{"given":"HsinYu","family":"Tsai","sequence":"additional","affiliation":[]},{"given":"Geoffrey W.","family":"Burr","sequence":"additional","affiliation":[]},{"given":"Hernan","family":"Castro","sequence":"additional","affiliation":[]},{"given":"Bill","family":"Filipiak","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1600-6151","authenticated-orcid":false,"given":"Manuel","family":"Le Gallo","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3141-4970","authenticated-orcid":false,"given":"Abbas","family":"Rahimi","sequence":"additional","affiliation":[]},{"given":"Vijay","family":"Narayanan","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5603-5243","authenticated-orcid":false,"given":"Abu","family":"Sebastian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,8]]},"reference":[{"key":"753_CR1","unstructured":"Jiang, A. Q. et al. Mixtral of experts. Preprint at https:\/\/arxiv.org\/abs\/2401.04088 (2024)."},{"key":"753_CR2","unstructured":"Touvron, H. et al. Llama 2: open foundation and fine-tuned chat models. Preprint at https:\/\/arxiv.org\/abs\/2307.09288 (2024)."},{"key":"753_CR3","unstructured":"Gemini Team Google et al. Gemini: a family of highly capable multimodal models. Preprint at https:\/\/arxiv.org\/abs\/2312.11805 (2023)."},{"key":"753_CR4","unstructured":"Brown, T. B. et al. Language models are few-shot learners. In Proc. Advances in Neural Information Processing Systems Vol. 33 (eds Larochelle, H. et al.) 1877\u20131901 (Curran Associates, 2020)."},{"key":"753_CR5","unstructured":"Kaplan, J. et al. Scaling laws for neural language models. Preprint at https:\/\/arxiv.org\/abs\/2001.08361 (2020)."},{"key":"753_CR6","unstructured":"Hoffmann, J. et al. An empirical analysis of compute-optimal large language model training. In Proc. Advances in Neural Information Processing Systems (eds Koyejo, S. et al.) Vol. 35 (Curran Associates, 2022)."},{"key":"753_CR7","first-page":"11324","volume":"24","author":"A Chowdhery","year":"2023","unstructured":"Chowdhery, A. et al. PaLM: scaling language modeling with pathways. J. Mach. Learn. Res. 24, 11324\u201311436 (2023).","journal-title":"J. Mach. Learn. Res."},{"key":"753_CR8","doi-asserted-by":"publisher","unstructured":"Jordan, M. & Jacobs, R. Hierarchical mixtures of experts and the EM algorithm. In Proc. 1993 International Conference on Neural Networks (IJCNN-93-Nagoya, Japan) Vol. 2, 1339\u20131344 (IEEE, 1993); https:\/\/doi.org\/10.1109\/IJCNN.1993.716791","DOI":"10.1109\/IJCNN.1993.716791"},{"key":"753_CR9","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1162\/neco.1991.3.1.79","volume":"3","author":"RA Jacobs","year":"1991","unstructured":"Jacobs, R. A., Jordan, M. I., Nowlan, S. J. & Hinton, G. E. Adaptive mixtures of local experts. Neural Comput. 3, 79\u201387 (1991).","journal-title":"Neural Comput."},{"key":"753_CR10","unstructured":"Shazeer, N. et al. Outrageously large neural networks: the sparsely-gated mixture-of-experts layer. In Proc. International Conference on Learning Representations (ICLR, 2017); https:\/\/openreview.net\/forum?id=B1ckMDqlg"},{"key":"753_CR11","first-page":"5232","volume":"23","author":"W Fedus","year":"2022","unstructured":"Fedus, W., Zoph, B. & Shazeer, N. Switch transformers: scaling to trillion parameter models with simple and efficient sparsity. J. Mach. Learn. Res. 23, 5232\u20135270 (2022).","journal-title":"J. Mach. Learn. Res."},{"key":"753_CR12","first-page":"5485","volume":"21","author":"C Raffel","year":"2020","unstructured":"Raffel, C. et al. Exploring the limits of transfer learning with a unified text-to-text transformer. J. Mach. Learn. Res. 21, 5485\u20135551 (2020).","journal-title":"J. Mach. Learn. Res."},{"key":"753_CR13","unstructured":"Du, N. et al. GLaM: efficient scaling of language models with mixture-of-experts. In Proc. 39th International Conference on Machine Learning, Proceedings of Machine Learning Research Vol. 162, 5547\u20135569 (PMLR, 2022)."},{"key":"753_CR14","unstructured":"Clark, A. et al. Unified scaling laws for routed language models. In Proc. 39th International Conference on Machine Learning Vol. 162 (eds Chaudhuri, K. et al.) 4057\u20134086 (PMLR, 2022)."},{"key":"753_CR15","unstructured":"Ludziejewski, J. et al. Scaling laws for fine-grained mixture of experts. In Proc. ICLR 2024 Workshop on Mathematical and Empirical Understanding of Foundation Models (PMLR, 2024); https:\/\/openreview.net\/forum?id=Iizr8qwH7J"},{"key":"753_CR16","doi-asserted-by":"publisher","unstructured":"Csord\u00e1s, R., Irie, K. & Schmidhuber, J. Approximating two-layer feedforward networks for efficient transformers. In Proc. Association for Computational Linguistics: EMNLP 2023 (eds Bouamor, H. et al.) 674\u2013692 (ACL, 2023); https:\/\/doi.org\/10.18653\/v1\/2023.findings-emnlp.49","DOI":"10.18653\/v1\/2023.findings-emnlp.49"},{"key":"753_CR17","doi-asserted-by":"publisher","unstructured":"Reuther, A. et al. AI and ML accelerator survey and trends. In Proc. 2022 IEEE High Performance Extreme Computing Conference (HPEC) 1\u201310 (IEEE, 2022); https:\/\/doi.org\/10.1109\/HPEC55821.2022.9926331","DOI":"10.1109\/HPEC55821.2022.9926331"},{"key":"753_CR18","doi-asserted-by":"publisher","first-page":"529","DOI":"10.1038\/s41565-020-0655-z","volume":"15","author":"A Sebastian","year":"2020","unstructured":"Sebastian, A., Le Gallo, M., Khaddam-Aljameh, R. & Eleftheriou, E. Memory devices and applications for in-memory computing. Nat. Nanotechnol. 15, 529\u2013544 (2020).","journal-title":"Nat. Nanotechnol."},{"key":"753_CR19","doi-asserted-by":"publisher","first-page":"eabj9979","DOI":"10.1126\/science.abj9979","volume":"376","author":"M Lanza","year":"2022","unstructured":"Lanza, M. et al. Memristive technologies for data storage, computation, encryption and radio-frequency communication. Science 376, eabj9979 (2022).","journal-title":"Science"},{"key":"753_CR20","doi-asserted-by":"publisher","first-page":"010902","DOI":"10.1063\/5.0136403","volume":"1","author":"P Mannocci","year":"2023","unstructured":"Mannocci, P. et al. In-memory computing with emerging memory devices: status and outlook. APL Mach. Learn 1, 010902 (2023).","journal-title":"APL Mach. Learn"},{"key":"753_CR21","doi-asserted-by":"publisher","first-page":"286","DOI":"10.1038\/s44287-024-00037-6","volume":"1","author":"Y Huang","year":"2024","unstructured":"Huang, Y. et al. Memristor-based hardware accelerators for artificial intelligence. Nat. Rev. Electr. Eng. 1, 286\u2013299 (2024).","journal-title":"Nat. Rev. Electr. Eng."},{"key":"753_CR22","doi-asserted-by":"publisher","first-page":"680","DOI":"10.1038\/s41928-023-01010-1","volume":"6","author":"M Le Gallo","year":"2023","unstructured":"Le Gallo, M. et al. A 64-core mixed-signal in-memory compute chip based on phase-change memory for deep neural network inference. Nat. Electron. 6, 680\u2013693 (2023).","journal-title":"Nat. Electron."},{"key":"753_CR23","doi-asserted-by":"publisher","first-page":"768","DOI":"10.1038\/s41586-023-06337-5","volume":"620","author":"S Ambrogio","year":"2023","unstructured":"Ambrogio, S. et al. An analog-AI chip for energy-efficient speech recognition and transcription. Nature 620, 768\u2013775 (2023).","journal-title":"Nature"},{"key":"753_CR24","doi-asserted-by":"publisher","first-page":"504","DOI":"10.1038\/s41586-022-04992-8","volume":"608","author":"W Wan","year":"2022","unstructured":"Wan, W. et al. A compute-in-memory chip based on resistive random-access memory. Nature 608, 504\u2013512 (2022).","journal-title":"Nature"},{"key":"753_CR25","doi-asserted-by":"publisher","first-page":"1205","DOI":"10.1126\/science.ade3483","volume":"381","author":"W Zhang","year":"2023","unstructured":"Zhang, W. et al. Edge learning using a fully integrated neuro-inspired memristor chip. Science 381, 1205\u20131211 (2023).","journal-title":"Science"},{"key":"753_CR26","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1126\/science.adf5538","volume":"384","author":"T-H Wen","year":"2024","unstructured":"Wen, T.-H. et al. Fusion of memristor and digital compute-in-memory processing for energy-efficient edge computing. Science 384, 325\u2013332 (2024).","journal-title":"Science"},{"key":"753_CR27","doi-asserted-by":"publisher","unstructured":"Fick, L., Skrzyniarz, S., Parikh, M., Henry, M. B. & Fick, D. Analog matrix processor for edge AI real-time video analytics. In Proc. 2022 IEEE International Solid-State Circuits Conference (ISSCC) Vol. 65, 260\u2013262 (IEEE, 2022); https:\/\/doi.org\/10.1109\/ISSCC42614.2022.9731773","DOI":"10.1109\/ISSCC42614.2022.9731773"},{"key":"753_CR28","doi-asserted-by":"publisher","unstructured":"Arnaud, F. et al. High density embedded PCM cell in 28\u2009nm FDSOI technology for automotive micro-controller applications. In Proc. 2020 IEEE International Electron Devices Meeting (IEDM) 24.2.1\u201324.2.4 (IEEE, 2020); https:\/\/doi.org\/10.1109\/IEDM13553.2020.9371934","DOI":"10.1109\/IEDM13553.2020.9371934"},{"key":"753_CR29","doi-asserted-by":"publisher","unstructured":"Lee, S. et al. A 1\u2009Tb 4b\/cell 64-stacked-WL 3D NAND flash memory with 12\u2009MB\/s program throughput. In Proc. 2018 IEEE International Solid-State Circuits Conference (ISSCC) 340\u2013342 (IEEE, 2018); https:\/\/doi.org\/10.1109\/ISSCC.2018.8310323","DOI":"10.1109\/ISSCC.2018.8310323"},{"key":"753_CR30","doi-asserted-by":"publisher","unstructured":"Park, J.-W. et al. A 176-stacked 512\u2009Gb 3b\/cell 3D-NAND flash with 10.8\u2009Gb\/mm2 density with a peripheral circuit under cell array architecture. In Proc. 2021 IEEE International Solid-State Circuits Conference (ISSCC) Vol. 64, 422\u2013423 (IEEE, 2021); https:\/\/doi.org\/10.1109\/ISSCC42613.2021.9365809","DOI":"10.1109\/ISSCC42613.2021.9365809"},{"key":"753_CR31","doi-asserted-by":"publisher","first-page":"571292","DOI":"10.3389\/fnins.2020.571292","volume":"14","author":"S-T Lee","year":"2020","unstructured":"Lee, S.-T. & Lee, J.-H. Neuromorphic computing using NAND flash memory architecture with pulse width modulation scheme. Front. Neurosci. 14, 571292 (2020).","journal-title":"Front. Neurosci."},{"key":"753_CR32","doi-asserted-by":"publisher","first-page":"014001","DOI":"10.1088\/2634-4386\/ac0775","volume":"1","author":"M Bavandpour","year":"2021","unstructured":"Bavandpour, M., Sahay, S., Mahmoodi, M. R. & Strukov, D. B. 3D-aCortex: an ultra-compact energy-efficient neurocomputing platform based on commercial 3D-NAND flash memories. Neuromorphic Comput. Eng. 1, 014001 (2021).","journal-title":"Neuromorphic Comput. Eng."},{"key":"753_CR33","doi-asserted-by":"publisher","first-page":"160","DOI":"10.1109\/LED.2020.3048101","volume":"42","author":"W Shim","year":"2020","unstructured":"Shim, W. & Yu, S. Technological design of 3D NAND-based compute-in-memory architecture for GB-scale deep neural network. IEEE Electron Device Lett. 42, 160\u2013163 (2020).","journal-title":"IEEE Electron Device Lett."},{"key":"753_CR34","doi-asserted-by":"publisher","unstructured":"Hsieh, C.-C. et al. Chip demonstration of a high-density (43\u2009Gb) and high-search-bandwidth (300\u2009Gb\/s) 3D NAND based in-memory search accelerator for Ternary Content Addressable Memory (TCAM) and proximity search of Hamming distance. In Proc. 2023 IEEE Symposium on VLSI Technology and Circuits (VLSI Technology and Circuits) 1\u20132 (IEEE, 2023); https:\/\/doi.org\/10.23919\/VLSITechnologyandCir57934.2023.10185361","DOI":"10.23919\/VLSITechnologyandCir57934.2023.10185361"},{"key":"753_CR35","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1038\/s41928-022-00795-x","volume":"5","author":"Q Huo","year":"2022","unstructured":"Huo, Q. et al. A computing-in-memory macro based on three-dimensional resistive random-access memory. Nat. Electron. 5, 469\u2013477 (2022).","journal-title":"Nat. Electron."},{"key":"753_CR36","doi-asserted-by":"publisher","first-page":"114","DOI":"10.1109\/TVLSI.2022.3221390","volume":"31","author":"S Jain","year":"2023","unstructured":"Jain, S. et al. A heterogeneous and programmable compute-in-memory accelerator architecture for analog-AI using dense 2-D mesh. IEEE Trans. Very Large Scale Integr. VLSI Syst. 31, 114\u2013127 (2023).","journal-title":"IEEE Trans. Very Large Scale Integr. VLSI Syst."},{"key":"753_CR37","doi-asserted-by":"crossref","unstructured":"Cui, C. et al. A survey on multimodal large language models for autonomous driving. In Proc. IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV) Workshops 958\u2013979 (IEEE, 2024).","DOI":"10.1109\/WACVW60836.2024.00106"},{"key":"753_CR38","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.-W., Lee, K. & Toutanova, K. BERT: pre-training of deep bidirectional transformers for language understanding. In Proc. 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies Vol. 1, 4171\u20134186 (ACL, 2019); https:\/\/doi.org\/10.18653\/v1\/N19-1423","DOI":"10.18653\/v1\/N19-1423"},{"key":"753_CR39","unstructured":"Kim, W., Son, B. & Kim, I. ViLT: vision-and-language transformer without convolution or region supervision. In Proc. 38th International Conference on Machine Learning Vol. 139 (eds Meila, M. & Zhang, T.) 5583\u20135594 (PMLR, 2021); https:\/\/proceedings.mlr.press\/v139\/kim21k.html"},{"key":"753_CR40","unstructured":"Alayrac, J.-B. et al. Flamingo: a visual language model for few-shot learning. In Proc. Advances in Neural Information Processing Systems Vol. 35 (eds Koyejo, S. et al.) 23716\u201323736 (Curran Associates, 2022); https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/960a172bc7fbf0177ccccbb411a7d800-Paper-Conference.pdf"},{"key":"753_CR41","unstructured":"Pope, R. et al. Efficiently scaling transformer inference. In Proc. Machine Learning and Systems Vol. 5 (eds Song, D. et al.) 606\u2013624 (Curran Associates, 2023).; https:\/\/proceedings.mlsys.org\/paper_files\/paper\/2023\/file\/c4be71ab8d24cdfb45e3d06dbfca2780-Paper-mlsys2023.pdf"},{"key":"753_CR42","doi-asserted-by":"publisher","first-page":"29","DOI":"10.1109\/MM.2021.3061394","volume":"41","author":"J Choquette","year":"2021","unstructured":"Choquette, J., Gandhi, W., Giroux, O., Stam, N. & Krashinsky, R. NVIDIA A100 Tensor Core GPU: performance and innovation. IEEE Micro 41, 29\u201335 (2021).","journal-title":"IEEE Micro"},{"key":"753_CR43","unstructured":"Radford, A. et al. Language models are unsupervised multitask learners. Semantic Scholar https:\/\/api.semanticscholar.org\/CorpusID:160025533 (2019)."},{"key":"753_CR44","unstructured":"Merity, S., Xiong, C., Bradbury, J. & Socher, R. Pointer sentinel mixture models. In Proc. International Conference on Learning Representations (ICLR, 2017); https:\/\/openreview.net\/forum?id=Byj72udxe"},{"key":"753_CR45","doi-asserted-by":"publisher","first-page":"6279","DOI":"10.1109\/TED.2023.3321014","volume":"70","author":"A Vasilopoulos","year":"2023","unstructured":"Vasilopoulos, A. et al. Exploiting the state dependency of conductance variations in memristive devices for accurate in-memory computing. IEEE Trans. Electron Devices 70, 6279\u20136285 (2023).","journal-title":"IEEE Trans. Electron Devices"},{"key":"753_CR46","unstructured":"Paszke, A. et al. PyTorch: an imperative style, high-performance deep learning library. In Proc. Advances in Neural Information Processing Systems Vol. 32, 8024\u20138035 (Curran Associates, 2019)."},{"key":"753_CR47","unstructured":"Reed, J. K., DeVito, Z., He, H., Ussery, A. & Ansel, J. Torch.fx: practical program capture and transformation for deep learning in Python. Preprint at https:\/\/arxiv.org\/abs\/2112.08429 (2021)."},{"key":"753_CR48","unstructured":"Vaswani, A. et al. Attention is all you need. In Proc. Advances in Neural Information Processing Systems Vol. 30 (eds Guyon, I. et al.) (Curran Associates, 2017); https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"753_CR49","doi-asserted-by":"publisher","first-page":"478","DOI":"10.1109\/TC.1981.1675827","volume":"C-30","author":"Fisher","year":"1981","unstructured":"Fisher Trace scheduling: a technique for global microcode compaction. IEEE Trans. Comput. C-30, 478\u2013490 (1981).","journal-title":"IEEE Trans. Comput."},{"key":"753_CR50","doi-asserted-by":"publisher","unstructured":"Bernstein, D. & Rodeh, M. Global instruction scheduling for superscalar machines. In Proc. ACM SIGPLAN 1991 Conference on Programming Language Design and Implementation PLDI \u201991 241\u2013255 (ACM, 1991); https:\/\/doi.org\/10.1145\/113445.113466","DOI":"10.1145\/113445.113466"},{"key":"753_CR51","doi-asserted-by":"publisher","DOI":"10.1038\/s41467-020-16108-9","volume":"11","author":"V Joshi","year":"2020","unstructured":"Joshi, V. et al. Accurate deep neural network inference using computational phase-change memory. Nat. Commun. 11, 2473 (2020).","journal-title":"Nat. Commun."},{"key":"753_CR52","doi-asserted-by":"crossref","unstructured":"Kudo, T. & Richardson, J. SentencePiece: a simple and language independent subword tokenizer and detokenizer for neural text processing. In Proc. 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations (eds Blanco, E. & Lu, W.) 66\u201371 (Association for Computational Linguistics, 2018).","DOI":"10.18653\/v1\/D18-2012"},{"key":"753_CR53","doi-asserted-by":"publisher","unstructured":"Tillet, P., Kung, H. T. & Cox, D. Triton: an intermediate language and compiler for tiled neural network computations. In Proc. 3rd ACM SIGPLAN International Workshop on Machine Learning and Programming Languages 10\u201319 (ACM, 2019); https:\/\/doi.org\/10.1145\/3315508.3329973","DOI":"10.1145\/3315508.3329973"},{"key":"753_CR54","doi-asserted-by":"publisher","first-page":"041102","DOI":"10.1063\/5.0168089","volume":"1","author":"M Le Gallo","year":"2023","unstructured":"Le Gallo, M. et al. Using the IBM analog in-memory hardware acceleration kit for neural network training and inference. APL Mach. Learn. 1, 041102 (2023).","journal-title":"APL Mach. Learn."},{"key":"753_CR55","unstructured":"B\u00fcchel, J. et al. AIHWKIT-lightning: a scalable HW-aware training toolkit for analog in-memory computing. In Proc. Advances in Neural Information Processing Systems 2024 Workshop, Machine Learning with new Compute Paradigms (Curran Associates, 2024); https:\/\/openreview.net\/forum?id=QNdxOgGmhR"},{"key":"753_CR56","doi-asserted-by":"publisher","unstructured":"B\u00fcchel, J. et al. Gradient descent-based programming of analog in-memory computing cores. In Proc. 2022 International Electron Devices Meeting (IEDM) 33.1.1\u201333.1.4 (IEEE, 2022); https:\/\/doi.org\/10.1109\/IEDM45625.2022.10019486","DOI":"10.1109\/IEDM45625.2022.10019486"},{"key":"753_CR57","doi-asserted-by":"publisher","unstructured":"B\u00fcchel, J. Source data for figures in \u2018Efficient scaling of large language models with mixture of experts and 3D analog in-memory computing\u2019. Zenodo https:\/\/doi.org\/10.5281\/zenodo.14146703 (2024).","DOI":"10.5281\/zenodo.14146703"},{"key":"753_CR58","doi-asserted-by":"publisher","unstructured":"B\u00fcchel, J. IBM\/analog-moe: code release. Zenodo https:\/\/doi.org\/10.5281\/zenodo.14025079 (2024).","DOI":"10.5281\/zenodo.14025079"},{"key":"753_CR59","doi-asserted-by":"publisher","unstructured":"B\u00fcchel, J. & Vasilopolous, A. IBM\/3D-CiM-LLM-Inference-Simulator: code release. Zenodo https:\/\/doi.org\/10.5281\/zenodo.14025077 (2024).","DOI":"10.5281\/zenodo.14025077"},{"key":"753_CR60","doi-asserted-by":"publisher","first-page":"1373","DOI":"10.1109\/TED.2020.2968079","volume":"67","author":"A Goda","year":"2020","unstructured":"Goda, A. 3D NAND technology achievements and future scaling perspectives. IEEE Trans. Electron Devices 67, 1373\u20131381 (2020).","journal-title":"IEEE Trans. Electron Devices"},{"key":"753_CR61","doi-asserted-by":"publisher","unstructured":"Lacaita, A. L., Spinelli, A. S. & Compagnoni, C. M. High-density solid-state storage: a long path to success. In Proc. 2021 IEEE Latin America Electron Devices Conference (LAEDC) 1\u20134 (IEEE, 2021); https:\/\/doi.org\/10.1109\/LAEDC51812.2021.9437865","DOI":"10.1109\/LAEDC51812.2021.9437865"},{"key":"753_CR62","unstructured":"Shoeybi, M. et al. Megatron-LM: training multi-billion parameter language models using model parallelism. Preprint at https:\/\/arxiv.org\/abs\/1909.08053 (2020)."}],"container-title":["Nature Computational Science"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s43588-024-00753-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s43588-024-00753-x","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s43588-024-00753-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,26]],"date-time":"2025-02-26T15:46:31Z","timestamp":1740584791000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s43588-024-00753-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1,8]]},"references-count":62,"journal-issue":{"issue":"1","published-online":{"date-parts":[[2025,1]]}},"alternative-id":["753"],"URL":"https:\/\/doi.org\/10.1038\/s43588-024-00753-x","relation":{},"ISSN":["2662-8457"],"issn-type":[{"value":"2662-8457","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1,8]]},"assertion":[{"value":"27 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 November 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 January 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]}}