{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T06:10:07Z","timestamp":1743747007037,"version":"3.40.3"},"reference-count":30,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,4,1]],"date-time":"2025-04-01T00:00:00Z","timestamp":1743465600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"IITP","award":["RS-2024-00397359","RS-2024-00397359"],"award-info":[{"award-number":["RS-2024-00397359","RS-2024-00397359"]}]},{"DOI":"10.13039\/501100003708","name":"Korea Institute of Science and Technology Information","doi-asserted-by":"crossref","award":["K25L1M2C2-01","K25L1M2C2-01"],"award-info":[{"award-number":["K25L1M2C2-01","K25L1M2C2-01"]}],"id":[{"id":"10.13039\/501100003708","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-025-07169-y","type":"journal-article","created":{"date-parts":[[2025,4,3]],"date-time":"2025-04-03T19:48:29Z","timestamp":1743709709000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Pipeline parallelism with reduced network communication for efficient compute-intensive neural network training"],"prefix":"10.1007","volume":"81","author":[{"given":"Chanhee","family":"Yu","sequence":"first","affiliation":[]},{"given":"Kyongseok","family":"Park","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,4,1]]},"reference":[{"key":"7169_CR1","unstructured":"Gholami A (2023) Ai and memory wall. Riselab . https:\/\/medium.com\/riselab\/ai-and-memory-wall-2cb4265cb0b8"},{"key":"7169_CR2","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1016\/j.agrformet.2018.10.013","volume":"264","author":"S Madec","year":"2019","unstructured":"Madec S, Jin X, Lu H, De Solan B, Liu S, Duyme F, Heritier E, Baret F (2019) Ear density estimation from high resolution rgb imagery using deep learning technique. Agric For Meteorol 264:225\u2013234. https:\/\/doi.org\/10.1016\/j.agrformet.2018.10.013","journal-title":"Agric For Meteorol"},{"key":"7169_CR3","doi-asserted-by":"crossref","unstructured":"Xu Y, Xie Z, Feng Y, Chen Z (2018) Road extraction from high-resolution remote sensing imagery using deep learning. Remote Sens 10(9)","DOI":"10.3390\/rs10091461"},{"key":"7169_CR4","unstructured":"Dean J, Corrado G, Monga R, Chen K, Devin M, Mao M, Ranzato Ma, Senior A, Tucker P, Yang K, Le Q, Ng A (2012) Large scale distributed deep networks. In: Advances in Neural Information Processing Systems, vol. 25"},{"key":"7169_CR5","unstructured":"Huang Y, Cheng Y, Bapna A, Firat O, Chen D, Chen M, Lee H, Ngiam J, Le QV, Wu Y, Chen Z (2019) GPipe: Efficient training of giant neural networks using pipeline parallelism. In: Advances in Neural Information Processing Systems, vol. 32"},{"key":"7169_CR6","doi-asserted-by":"publisher","unstructured":"Fan S, Rong Y, Meng C, Cao Z, Wang S, Zheng Z, Wu C, Long G, Yang J, Xia L, Diao L, Liu X, Lin W (2021) DAPPLE: A pipelined data parallel approach for training large models. In: Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming, pp. 431\u2013445 https:\/\/doi.org\/10.1145\/3437801.3441593","DOI":"10.1145\/3437801.3441593"},{"key":"7169_CR7","doi-asserted-by":"publisher","unstructured":"Yu C, Park K (2023) Accelerated synchronous model parallelism using cooperative process for training compute-intensive models. IEEE Access 11:74914\u201374923 https:\/\/doi.org\/10.1109\/ACCESS.2023.3296609","DOI":"10.1109\/ACCESS.2023.3296609"},{"key":"7169_CR8","unstructured":"Redmon J, Farhadi A (2018) YOLOv3: an incremental improvement"},{"key":"7169_CR9","doi-asserted-by":"publisher","unstructured":"Narayanan D, Harlap A, Phanishayee A, Seshadri V, Devanur NR, Ganger GR, Gibbons PB, Zaharia M (2019) PipeDream: generalized pipeline parallelism for dnn training. In: Proceedings of the 27th ACM Symposium on Operating Systems Principles, pp 1\u201315. https:\/\/doi.org\/10.1145\/3341301.3359646","DOI":"10.1145\/3341301.3359646"},{"key":"7169_CR10","unstructured":"Narayanan D, Phanishayee A, Shi K, Chen X, Zaharia M (2021) Memory-efficient pipeline-parallel dnn training. In: Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 139, pp 7937\u20137947. https:\/\/proceedings.mlr.press\/v139\/narayanan21a.html"},{"key":"7169_CR11","unstructured":"Yang B, Zhang J, Li J, Re C, Aberger C, De\u00a0Sa C (2021) PipeMare: asynchronous pipeline parallel dnn training. In: Proceedings of Machine Learning and Systems, vol. 3, pp 269\u2013296"},{"key":"7169_CR12","unstructured":"Guan L, Yin W, Li D, Lu X (2020) XPipe: efficient pipeline model parallelism for Multi-GPU DNN training"},{"key":"7169_CR13","doi-asserted-by":"crossref","unstructured":"Li M, Andersen DG, Park JW, Smola AJ, Ahmed A, Josifovski V, Long J, Shekita EJ, Su B-Y (2014) Scaling distributed machine learning with the parameter server. In: 11th USENIX Symposium on Operating Systems Design and Implementation (OSDI 14), pp 583\u2013598","DOI":"10.1145\/2640087.2644155"},{"key":"7169_CR14","unstructured":"Sergeev A, Balso MD (2018) Horovod: fast and easy distributed deep learning in TensorFlow"},{"key":"7169_CR15","unstructured":"Blot M, Picard D, Cord M, Thome N (2016) Gossip training for deep learning"},{"key":"7169_CR16","unstructured":"Ho Q, Cipar J, Cui H, Kim JK, Lee S, Gibbons PB, Gibson GA, Ganger GR, Xing EP (2013) More effective distributed ml via a stale synchronous parallel parameter server. In: Advances in Neural Information Processing Systems, vol. 1"},{"key":"7169_CR17","doi-asserted-by":"publisher","unstructured":"Zhao X, An A, Liu J, Chen B (2019) Dynamic stale synchronous parallel distributed training for deep learning. In: 2019 IEEE 39th International Conference on Distributed Computing Systems (ICDCS), pp 1507\u20131517. IEEE Computer Society, Los Alamitos, CA, USA. https:\/\/doi.org\/10.1109\/ICDCS.2019.00150","DOI":"10.1109\/ICDCS.2019.00150"},{"key":"7169_CR18","doi-asserted-by":"publisher","unstructured":"Li S, Hoefler T (2021) Chimera: Efficiently training large-scale neural networks with bidirectional pipelines. In: Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis. https:\/\/doi.org\/10.1145\/3458817.3476145","DOI":"10.1145\/3458817.3476145"},{"key":"7169_CR19","doi-asserted-by":"publisher","unstructured":"Cui L, Qu Z, Zhang G, Tang B, Ye B (2023) A bidirectional dnn partition mechanism for efficient pipeline parallel training in cloud. J Cloud Comput 12(1). https:\/\/doi.org\/10.1186\/s13677-022-00382-7","DOI":"10.1186\/s13677-022-00382-7"},{"key":"7169_CR20","doi-asserted-by":"publisher","unstructured":"Duan Y, Lai Z, Li S, Liu W, Ge K, Liang P, Li D (2022) HPH: hybrid parallelism on heterogeneous clusters for accelerating large-scale dnns training. In: 2022 IEEE International Conference on Cluster Computing, pp. 313\u2013323. https:\/\/doi.org\/10.1109\/CLUSTER51413.2022.00043","DOI":"10.1109\/CLUSTER51413.2022.00043"},{"key":"7169_CR21","unstructured":"Shoeybi M, Patwary M, Puri R, LeGresley P, Casper J, Catanzaro B (2020) Megatron-LM: training multi-billion parameter language models using model parallelism"},{"key":"7169_CR22","unstructured":"Li Z, Zhuang S, Guo S, Zhuo D, Zhang H, Song D, Stoica I (2021) TeraPipe: token-level pipeline parallelism for training large-scale language models. In: Proceedings of the 38th International Conference on Machine Learning. Proceedings of Machine Learning Research, vol. 139, pp 6543\u20136552"},{"key":"7169_CR23","unstructured":"Chen T, Xu B, Zhang C, Guestrin C (2016) Training deep nets with sublinear memory cost"},{"key":"7169_CR24","doi-asserted-by":"publisher","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: 2016 IEEE Conference on Computer Vision and Pattern Recognition, pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90","DOI":"10.1109\/CVPR.2016.90"},{"key":"7169_CR25","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: 3rd International Conference on Learning Representations, pp 1\u201314. Computational and Biological Learning Society"},{"issue":"1","key":"7169_CR26","doi-asserted-by":"publisher","first-page":"49","DOI":"10.1177\/1094342005051521","volume":"19","author":"R Thakur","year":"2005","unstructured":"Thakur R, Rabenseifner R, Gropp W (2005) Optimization of collective communication operations in mpich. Int J High Perform Comput Appl 19(1):49\u201366. https:\/\/doi.org\/10.1177\/1094342005051521","journal-title":"Int J High Perform Comput Appl"},{"key":"7169_CR27","doi-asserted-by":"publisher","unstructured":"Nuriyev E, Rico-Gallego J-A, Lastovetsky A (2022) Model-based selection of optimal mpi broadcast algorithms for multi-core clusters. J Parallel Distrib Comput. 165:1\u201316. https:\/\/doi.org\/10.1016\/j.jpdc.2022.03.012","DOI":"10.1016\/j.jpdc.2022.03.012"},{"issue":"4","key":"7169_CR28","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1109\/MCSE.2021.3083216","volume":"23","author":"L Dalcin","year":"2021","unstructured":"Dalcin L, Fang Y-LL (2021) mpi4py: Status update after 12 years of development. Comput. Sci. Eng. 23(4):47\u201354. https:\/\/doi.org\/10.1109\/MCSE.2021.3083216","journal-title":"Comput. Sci. Eng."},{"key":"7169_CR29","unstructured":"Chen T, Li M, Li Y, Lin M, Wang N, Wang M, Xiao T, Xu B, Zhang C, Zhang Z (2015) MXNet: a flexible and efficient machine learning library for heterogeneous distributed systems"},{"issue":"10","key":"7169_CR30","doi-asserted-by":"publisher","first-page":"1865","DOI":"10.1109\/JPROC.2017.2675998","volume":"105","author":"G Cheng","year":"2017","unstructured":"Cheng G, Han J, Lu X (2017) Remote sensing image scene classification: benchmark and state of the art. Proc IEEE 105(10):1865\u20131883. https:\/\/doi.org\/10.1109\/JPROC.2017.2675998","journal-title":"Proc IEEE"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07169-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-025-07169-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-025-07169-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,4,4]],"date-time":"2025-04-04T05:56:29Z","timestamp":1743746189000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-025-07169-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,4,1]]},"references-count":30,"journal-issue":{"issue":"5","published-online":{"date-parts":[[2025,4]]}},"alternative-id":["7169"],"URL":"https:\/\/doi.org\/10.1007\/s11227-025-07169-y","relation":{},"ISSN":["1573-0484"],"issn-type":[{"type":"electronic","value":"1573-0484"}],"subject":[],"published":{"date-parts":[[2025,4,1]]},"assertion":[{"value":"7 March 2025","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"1 April 2025","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}}],"article-number":"682"}}