{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:39:29Z","timestamp":1766219969655,"version":"3.48.0"},"publisher-location":"New York, NY, USA","reference-count":30,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation of China","award":["62172215"],"award-info":[{"award-number":["62172215"]}]},{"name":"A3 Foresight Program of NSFC","award":["62061146002"],"award-info":[{"award-number":["62061146002"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,9,8]]},"DOI":"10.1145\/3754598.3754659","type":"proceedings-article","created":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:34:32Z","timestamp":1766219672000},"page":"83-93","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["It Takes Two: Accelerating Accurate Federated Learning through Pipelined Intra-Batch Data Sampling and Training"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0006-1683-2233","authenticated-orcid":false,"given":"Chenghao","family":"Nu","sequence":"first","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6149-7570","authenticated-orcid":false,"given":"Zhe","family":"Zhang","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5678-4324","authenticated-orcid":false,"given":"Ye","family":"Li","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6314-0332","authenticated-orcid":false,"given":"Yanchao","family":"Zhao","sequence":"additional","affiliation":[{"name":"Nanjing University of Aeronautics and Astronautics, Nanjing, China"}]}],"member":"320","published-online":{"date-parts":[[2025,12,20]]},"reference":[{"key":"e_1_3_3_1_2_2","unstructured":"Guillaume Alain Alex Lamb Chinnadhurai Sankar Aaron Courville and Yoshua Bengio. 2015. Variance reduction in sgd by distributed importance sampling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1511.06481 (2015)."},{"key":"e_1_3_3_1_3_2","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D15-1075"},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.5555\/3692070.3692473"},{"key":"e_1_3_3_1_5_2","unstructured":"Aditya Devarakonda Maxim Naumov and Michael Garland. 2017. Adabatch: Adaptive batch sizes for training deep neural networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1712.02029 (2017)."},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"crossref","unstructured":"Chen Gong Zhenzhe Zheng Yunfeng Shao Bingshuai Li Fan Wu and Guihai Chen. 2024. ODE: An Online Data Selection Framework for Federated Learning With Limited Storage. IEEE\/ACM Transactions on Networking 32 4 (2024) 2794\u20132809.","DOI":"10.1109\/TNET.2024.3365534"},{"key":"e_1_3_3_1_7_2","first-page":"1143","volume-title":"Advances in Neural Information Processing Systems","author":"He Fengxiang","year":"2019","unstructured":"Fengxiang He, Tongliang Liu, and Dacheng Tao. 2019. Control Batch Size and Learning Rate to Generalize Well: Theoretical and Empirical Evidence. In Advances in Neural Information Processing Systems, Vol.\u00a032. 1143\u20131152."},{"key":"e_1_3_3_1_8_2","first-page":"18648","volume-title":"International Conference on Machine Learning","author":"Hong Feng","year":"2024","unstructured":"Feng Hong, Yueming Lyu, Jiangchao Yao, Ya Zhang, Ivor Tsang, and Yanfeng Wang. 2024. Diversified Batch Selection for Training Acceleration. In International Conference on Machine Learning. 18648\u201318667."},{"key":"e_1_3_3_1_9_2","first-page":"7265","volume-title":"Advances in Neural Information Processing Systems","author":"Johnson Tyler\u00a0B","year":"2018","unstructured":"Tyler\u00a0B Johnson and Carlos Guestrin. 2018. Training Deep Models Faster with Robust, Approximate Importance Sampling. In Advances in Neural Information Processing Systems, Vol.\u00a031. 7265\u20137275."},{"key":"e_1_3_3_1_10_2","volume-title":"5th International Conference on Learning Representations","author":"Keskar Nitish\u00a0Shirish","year":"2017","unstructured":"Nitish\u00a0Shirish Keskar, Jorge Nocedal, Ping Tak\u00a0Peter Tang, Dheevatsa Mudigere, and Mikhail Smelyanskiy. 2017. On large-batch training for deep learning: Generalization gap and sharp minima. In 5th International Conference on Learning Representations."},{"key":"e_1_3_3_1_11_2","doi-asserted-by":"publisher","DOI":"10.1145\/3698038.3698559"},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM42981.2021.9488723"},{"key":"e_1_3_3_1_13_2","first-page":"429","volume-title":"Proceedings of Machine Learning and Systems","volume":"2","author":"Li Tian","year":"2020","unstructured":"Tian Li, Anit\u00a0Kumar Sahu, Manzil Zaheer, Maziar Sanjabi, Ameet Talwalkar, and Virginia Smith. 2020. Federated Optimization in Heterogeneous Networks. In Proceedings of Machine Learning and Systems, Vol.\u00a02. 429\u2013450."},{"key":"e_1_3_3_1_14_2","first-page":"1273","volume-title":"Artificial intelligence and statistics","author":"McMahan Brendan","year":"2017","unstructured":"Brendan McMahan, Eider Moore, Daniel Ramage, Seth Hampson, and Blaise\u00a0Aguera y Arcas. 2017. Communication-efficient learning of deep networks from decentralized data. In Artificial intelligence and statistics. 1273\u20131282."},{"key":"e_1_3_3_1_15_2","doi-asserted-by":"crossref","unstructured":"Xupeng Miao Hailin Zhang Yining Shi Xiaonan Nie Zhi Yang Yangyu Tao Jie Jiang and Bin Cui. 2025. Efficient and scalable huge embedding model training via distributed cache management. The VLDB Journal 34 3 (2025) 1\u201320.","DOI":"10.1007\/s00778-025-00908-w"},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"crossref","unstructured":"Weiguang Pang Xiantong Luo Kailun Chen Dong Ji Lei Qiao and Wang Yi. 2023. Efficient CUDA stream management for multi-DNN real-time inference on embedded GPUs. Journal of Systems Architecture 139 (2023) 102888.","DOI":"10.1016\/j.sysarc.2023.102888"},{"key":"e_1_3_3_1_17_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00149"},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2023\/462"},{"key":"e_1_3_3_1_19_2","doi-asserted-by":"publisher","DOI":"10.1145\/3498361.3538917"},{"key":"e_1_3_3_1_20_2","first-page":"7611","volume-title":"Advances in Neural Information Processing Systems","author":"Wang Jianyu","year":"2020","unstructured":"Jianyu Wang, Qinghua Liu, Hao Liang, Gauri Joshi, and H.\u00a0Vincent Poor. 2020. Tackling the Objective Inconsistency Problem in Heterogeneous Federated Optimization. In Advances in Neural Information Processing Systems, Vol.\u00a033. 7611\u20137623."},{"key":"e_1_3_3_1_21_2","first-page":"131197","volume-title":"Advances in Neural Information Processing Systems","author":"Wang Jiachen\u00a0T.","year":"2024","unstructured":"Jiachen\u00a0T. Wang, Tong Wu, Dawn Song, Prateek Mittal, and Ruoxi Jia. 2024. GREATS: Online Selection of High-Quality Data for LLM Training in Every Iteration. In Advances in Neural Information Processing Systems, Vol.\u00a037. 131197\u2013131223."},{"key":"e_1_3_3_1_22_2","first-page":"52033","volume-title":"International Conference on Machine Learning","author":"Wang Jiachen\u00a0T","year":"2024","unstructured":"Jiachen\u00a0T Wang, Tianji Yang, James Zou, Yongchan Kwon, and Ruoxi Jia. 2024. Rethinking Data Shapley for Data Selection Tasks: Misleads and Merits. In International Conference on Machine Learning. 52033\u201352063."},{"key":"e_1_3_3_1_23_2","doi-asserted-by":"crossref","unstructured":"Zhiyuan Wang Hongli Xu Yang Xu Zhida Jiang Jianchun Liu and Suo Chen. 2023. Fast: enhancing federated learning through adaptive data sampling and local training. IEEE Transactions on Parallel and Distributed Systems 35 2 (2023) 221\u2013236.","DOI":"10.1109\/TPDS.2023.3334398"},{"key":"e_1_3_3_1_24_2","first-page":"59","volume-title":"2024 USENIX Annual Technical Conference","author":"Wu Hao","year":"2024","unstructured":"Hao Wu, Yue Yu, Junxiao Deng, Shadi Ibrahim, Song Wu, Hao Fan, Ziyue Cheng, and Hai Jin. 2024. StreamBox: A Lightweight GPU SandBox for Serverless Inference Workflow. In 2024 USENIX Annual Technical Conference. 59\u201373."},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Ou Wu and Rujing Yao. 2025. Data Optimization in Deep Learning: A Survey. IEEE Transactions on Knowledge and Data Engineering 37 5 (2025) 2356\u20132375.","DOI":"10.1109\/TKDE.2025.3530916"},{"key":"e_1_3_3_1_26_2","first-page":"54104","volume-title":"Proceedings of the 41st International Conference on Machine Learning","author":"Xia Mengzhou","year":"2024","unstructured":"Mengzhou Xia, Sadhika Malladi, Suchin Gururangan, Sanjeev Arora, and Danqi Chen. 2024. LESS: selecting influential data for targeted instruction tuning. In Proceedings of the 41st International Conference on Machine Learning. 54104\u201354132."},{"key":"e_1_3_3_1_27_2","first-page":"69798","volume-title":"Advances in Neural Information Processing Systems","author":"Xie Sang\u00a0Michael","year":"2023","unstructured":"Sang\u00a0Michael Xie, Hieu Pham, Xuanyi Dong, Nan Du, Hanxiao Liu, Yifeng Lu, Percy\u00a0S Liang, Quoc\u00a0V Le, Tengyu Ma, and Adams\u00a0Wei Yu. 2023. DoReMi: Optimizing Data Mixtures Speeds Up Language Model Pretraining. In Advances in Neural Information Processing Systems, Vol.\u00a036. 69798\u201369818."},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1145\/3580305.3599263"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1145\/3589334.3645341"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.1145\/3485730.3485930"},{"key":"e_1_3_3_1_31_2","unstructured":"Zhe Zhang Yanchao Zhao Chuyi Chen Kun Zhu and Dusit Niyato. 2025. Energy Efficient and Low Latency Federated Distillation over UAV-assisted Wireless Networks. IEEE Transactions on Wireless Communications (2025) 1\u201316."}],"event":{"name":"ICPP '25: 54th International Conference on Parallel Processing","location":"San Diego CA USA","acronym":"ICPP '25"},"container-title":["Proceedings of the 54th International Conference on Parallel Processing"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3754598.3754659","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,20]],"date-time":"2025-12-20T08:35:04Z","timestamp":1766219704000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3754598.3754659"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,9,8]]},"references-count":30,"alternative-id":["10.1145\/3754598.3754659","10.1145\/3754598"],"URL":"https:\/\/doi.org\/10.1145\/3754598.3754659","relation":{},"subject":[],"published":{"date-parts":[[2025,9,8]]},"assertion":[{"value":"2025-12-20","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}