{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T22:23:25Z","timestamp":1775082205392,"version":"3.50.1"},"publisher-location":"Singapore","reference-count":20,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819584192","type":"print"},{"value":"9789819584208","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-8420-8_1","type":"book-chapter","created":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T20:13:56Z","timestamp":1775074436000},"page":"3-14","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["FbsPipe: Forward-Backward Separation Pipeline Parallelism Method for Deep Learning"],"prefix":"10.1007","author":[{"given":"Zhengyu","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fengzhe","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yanzhao","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xiaofeng","family":"Qi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xinyi","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shuaikang","family":"Hou","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,4,2]]},"reference":[{"key":"1_CR1","doi-asserted-by":"crossref","unstructured":"Xu, W., Zhang, Y., Tang, X.: Parallelizing DNN training on GPUs: challenges and opportunities. In: Companion Proceedings of the Web Conference 2021, pp. 174\u2013178. Association for Computing Machinery, New York (2021)","DOI":"10.1145\/3442442.3452055"},{"key":"1_CR2","doi-asserted-by":"crossref","unstructured":"Jang, I., Yang, Z., Zhang, Z., et al.: Oobleck: resilient distributed training of large models using pipeline templates. In: Proceedings of the 29th Symposium on Operating Systems Principles, pp. 382\u2013395. Association for Computing Machinery, New York (2023)","DOI":"10.1145\/3600006.3613152"},{"key":"1_CR3","doi-asserted-by":"crossref","unstructured":"Zhou, Q., Wang, H., Yu, X., et al.: MPress: democratizing billion-scale model training on multi-GPU servers via memory-saving inter-operator parallelism. In: 2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA), pp. 556\u2013569 (2023)","DOI":"10.1109\/HPCA56546.2023.10071077"},{"issue":"11","key":"1_CR4","first-page":"2912","volume":"42","author":"Y Yang","year":"2020","unstructured":"Yang, Y., Wu, Q.M.J., Feng, X., et al.: Recomputation of the dense layers for performance improvement of DCNN. IEEE Trans. Pattern Anal. Mach. Intell. 42(11), 2912\u20132925 (2020)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"1","key":"1_CR5","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1109\/MCAS.2024.3349669","volume":"24","author":"Y Hu","year":"2024","unstructured":"Hu, Y., Lin, X., Wang, H., et al.: Wafer-scale computing: advancements, challenges, and future perspectives [Feature]. IEEE Circ. Syst. Mag. 24(1), 52\u201381 (2024)","journal-title":"IEEE Circ. Syst. Mag."},{"key":"1_CR6","doi-asserted-by":"crossref","unstructured":"Kim, K., Jae, P.M.: Present and future, challenges of high bandwith memory (HBM). In: 2024 IEEE International Memory Workshop (IMW), pp. 1\u20134 (2024)","DOI":"10.1109\/IMW59701.2024.10536972"},{"key":"1_CR7","unstructured":"Jiang, Z., Lin, H., Zhong, Y., et al.: {MegaScale}: scaling large language model training to more than 10,000 {GPUs}. In: 21st USENIX Symposium on Networked Systems Design and Implementation (NSDI 24), pp. 745\u2013760 (2024)"},{"key":"1_CR8","doi-asserted-by":"crossref","unstructured":"Rasley, J., Rajbhandari, S., Ruwase, O., et al.: DeepSpeed: System optimizations enable training deep learning models with over 100 Billion parameters, In: Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 3505\u20133506. Association for Computing Machinery, New York (2020)s","DOI":"10.1145\/3394486.3406703"},{"issue":"4","key":"1_CR9","doi-asserted-by":"publisher","DOI":"10.1088\/2632-2153\/ad9667","volume":"5","author":"J Zhu","year":"2024","unstructured":"Zhu, J., Feng, P., Lu, J., et al.: ZeROf-Offload: forward-gradient scheme for efficient full parameter fine-tuning of billion-scale language models. Mach. Learn. Sci. Technol. 5(4), 045054 (2024)","journal-title":"Mach. Learn. Sci. Technol."},{"key":"1_CR10","doi-asserted-by":"crossref","unstructured":"Li, C., Awan, A.A., Tang, H., et al.: 1-bit LAMB: communication efficient large-scale large-batch training with LAMB\u2019s convergence speed. In: 2022 IEEE 29th International Conference on High Performance Computing, Data, and Analytics (HiPC), pp. 272\u2013281 (2022)","DOI":"10.1109\/HiPC56025.2022.00044"},{"key":"1_CR11","doi-asserted-by":"publisher","unstructured":"Aburass, S., Dorgham, O.: Performance evaluation of swin vision transformer model using gradient accumulation optimization technique. In: Arai, K. (eds.) Proceedings of the Future Technologies Conference (FTC) 2023, Volume 4. FTC 2023. Lecture Notes in Networks and Systems, vol. 816. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-47448-4_5","DOI":"10.1007\/978-3-031-47448-4_5"},{"key":"1_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.future.2024.107597","volume":"164","author":"H Li","year":"2025","unstructured":"Li, H., Wang, Z., Zhao, H., et al.: Convergence-aware optimal checkpointing for exploratory deep learning training jobs. Futur. Gener. Comput. Syst. 164, 107597 (2025)","journal-title":"Futur. Gener. Comput. Syst."},{"key":"1_CR13","unstructured":"Huang, Y., Cheng, Y., Bapna, A., et al.: GPipe: efficient training of giant neural networks using pipeline parallelism. In: Advances in Neural Information Processing Systems, vol. 32. Curran Associates, Inc. (2019)"},{"key":"1_CR14","doi-asserted-by":"crossref","unstructured":"Sun, Z., Cao, H., Wang, Y., et al.: Adapipe: optimizing pipeline parallelism with adaptive recomputation and partitioning. In: Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, vol. 3, pp. 86\u2013100 (2024)","DOI":"10.1145\/3620666.3651359"},{"key":"1_CR15","doi-asserted-by":"crossref","unstructured":"Narayanan, D., Harlap, A., Phanishayee, A., et al.: PipeDream: generalized pipeline parallelism for DNN training. In: Proceedings of the 27th ACM Symposium on Operating Systems Principles, pp. 1\u201315. Association for Computing Machinery, New York (2019)","DOI":"10.1145\/3341301.3359646"},{"key":"1_CR16","unstructured":"Narayanan, D., Phanishayee, A., Shi, K., et al.: Memory-efficient pipeline-parallel DNN training. In: Proceedings of the 38th International Conference on Machine Learning, pp. 7937\u20137947. . PMLR (2021)"},{"key":"1_CR17","doi-asserted-by":"crossref","unstructured":"Fan, S., Rong, Y., Meng, C., et al.: DAPPLE: a pipelined data parallel approach for training large models. In: Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 431\u2013445. Association for Computing Machinery, New York (2021)","DOI":"10.1145\/3437801.3441593"},{"key":"1_CR18","doi-asserted-by":"crossref","unstructured":"Beaumont, O., Eyraud-Dubois, L., Shilova, A.: MadPipe: memory aware dynamic programming algorithm for pipelined model parallelism. In: 2022 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW), pp. 1063\u20131073 (2022)","DOI":"10.1109\/IPDPSW55747.2022.00174"},{"key":"1_CR19","unstructured":"Fang, J., Wang, H., Yang, Q., et al.: PALM: a efficient performance simulator for tiled accelerators with large-scale model training. arXiv (2024)"},{"key":"1_CR20","doi-asserted-by":"crossref","unstructured":"Liu, L., Liu, T., Jiang, B., et al.: Parmesan: efficient partitioning and mapping flow for DNN training on general device topology. IEEE Trans. Comput.-Aided Des. Integr. Circ. Syst. (2024)","DOI":"10.1109\/TCAD.2024.3368970"}],"container-title":["Lecture Notes in Computer Science","Algorithms and Architectures for Parallel Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-8420-8_1","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T20:13:59Z","timestamp":1775074439000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-8420-8_1"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819584192","9789819584208"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-8420-8_1","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 April 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICA3PP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Algorithms and Architectures for Parallel Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Zhengzhou","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2 November 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"25","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ica3pp2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/ieee-cybermatics.org\/2025\/ica3pp\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}