{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,9,11]],"date-time":"2025-09-11T19:47:43Z","timestamp":1757620063788,"version":"3.44.0"},"reference-count":44,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T00:00:00Z","timestamp":1753660800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T00:00:00Z","timestamp":1753660800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Stat Comput"],"published-print":{"date-parts":[[2025,10]]},"DOI":"10.1007\/s11222-025-10692-w","type":"journal-article","created":{"date-parts":[[2025,7,28]],"date-time":"2025-07-28T14:40:12Z","timestamp":1753713612000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Distributed Estimation and Algorithm for Distributed Outcome Dependent Subsampling in Generalized Linear Regression with Large-scale Data"],"prefix":"10.1007","volume":"35","author":[{"given":"Jie","family":"Yin","sequence":"first","affiliation":[]},{"given":"Jieli","family":"Ding","sequence":"additional","affiliation":[]},{"given":"Changming","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,7,28]]},"reference":[{"key":"10692_CR1","first-page":"749","volume":"31","author":"M Ai","year":"2021","unstructured":"Ai, M., Yu, J., Zhang, H., Wang, H.: Optimal subsampling algorithms for big data regressions. Stat. Sin. 31, 749\u2013772 (2021)","journal-title":"Stat. Sin."},{"key":"10692_CR2","first-page":"387","volume":"119","author":"M Assran","year":"2020","unstructured":"Assran, M., Rabbat, M.: On the convergence of nesterov\u2019s accelerated gradient method in stochastic settings. International Conference on Machine Learning 119, 387\u2013397 (2020)","journal-title":"International Conference on Machine Learning"},{"key":"10692_CR3","doi-asserted-by":"publisher","first-page":"1110","DOI":"10.1214\/aos\/1059655907","volume":"31","author":"N Breslow","year":"2003","unstructured":"Breslow, N., McNeney, B., Wellner, J.A.: Large sample theory for semiparametric regression models with two-phase, outcome dependent sampling. Ann. Stat. 31, 1110\u20131139 (2003)","journal-title":"Ann. Stat."},{"key":"10692_CR4","doi-asserted-by":"publisher","first-page":"4999","DOI":"10.1002\/sim.8346","volume":"38","author":"Y Cai","year":"2019","unstructured":"Cai, Y., Huang, J., Ning, J., Lee, M.T., Rosner, B., Chen, Y.: Two sample test for correlated data under outcome-dependent sampling with an application to self-reported weight loss data. Stat. Med. 38, 4999\u20135009 (2019)","journal-title":"Stat. Med."},{"key":"10692_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.jspi.2024.106186","volume":"233","author":"Y Chao","year":"2024","unstructured":"Chao, Y., Ma, X., Zhu, B.: Distributed optimal subsampling for quantile regression with massive data. Journal of Statistical Planning and Inference 233, 106186 (2024)","journal-title":"Journal of Statistical Planning and Inference"},{"key":"10692_CR6","first-page":"1655","volume":"24","author":"X Chen","year":"2014","unstructured":"Chen, X., Xie, M.: A split-and-conquer approach for analysis of extraordinarily large data. Stat. Sin. 24, 1655\u20131684 (2014)","journal-title":"Stat. Sin."},{"key":"10692_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.csda.2019.106892","volume":"144","author":"L Chen","year":"2020","unstructured":"Chen, L., Zhou, Y.: Quantile regression in big data: a divide and conquer based strategy. Comput. Stat. Data Anal. 144, 106892 (2020)","journal-title":"Comput. Stat. Data Anal."},{"key":"10692_CR8","doi-asserted-by":"publisher","first-page":"112","DOI":"10.1016\/j.jspi.2020.03.004","volume":"209","author":"Q Cheng","year":"2020","unstructured":"Cheng, Q., Wang, H., Yang, M.: Information-based optimal subdata selection for big data logistic regression. J. Statistical Planning and Inference 209, 112\u2013122 (2020)","journal-title":"J. Statistical Planning and Inference"},{"key":"10692_CR9","unstructured":"Dhillon, P. S., Lu, Y., Foster, D., Ungar, L.: New subsampling algorithms for fast least squares regression. International Conference on Neural Information Processing Systems, 360\u2013368 (2013)"},{"key":"10692_CR10","doi-asserted-by":"publisher","first-page":"57","DOI":"10.1007\/s10985-015-9355-7","volume":"23","author":"J Ding","year":"2017","unstructured":"Ding, J., Lu, T.-S., Cai, J., Zhou, H.: Recent progresses in outcome dependent sampling with failure time data. Lifetime Data Anal. 23, 57\u201382 (2017)","journal-title":"Lifetime Data Anal."},{"key":"10692_CR11","first-page":"1","volume":"40","author":"D Eddelbuettel","year":"2011","unstructured":"Eddelbuettel, D., Francois, R.: Rcpp: seamless R and C++ integration. Journal of Statistical Software, Articles 40, 1\u201318 (2011)","journal-title":"Journal of Statistical Software, Articles"},{"key":"10692_CR12","doi-asserted-by":"publisher","first-page":"1054","DOI":"10.1016\/j.csda.2013.02.005","volume":"71","author":"D Eddelbuettel","year":"2014","unstructured":"Eddelbuettel, D., Sanderson, C.: Rcpparmadillo: accelerating R with high-performance C++ linear algebra. Comput. Stat. Data Anal. 71, 1054\u20131063 (2014)","journal-title":"Comput. Stat. Data Anal."},{"key":"10692_CR13","doi-asserted-by":"publisher","DOI":"10.23915\/distill.00006","author":"G Goh","year":"2017","unstructured":"Goh, G.: Why momentum really works. Distill (2017). https:\/\/doi.org\/10.23915\/distill.00006","journal-title":"Distill"},{"key":"10692_CR14","doi-asserted-by":"publisher","first-page":"668","DOI":"10.1080\/01621459.2018.1429274","volume":"114","author":"MI Jordan","year":"2019","unstructured":"Jordan, M.I., Lee, J.D., Yang, Y.: Communication-efficient distributed statistical inference. J. Am. Stat. Assoc. 114, 668\u2013681 (2019)","journal-title":"J. Am. Stat. Assoc."},{"key":"10692_CR15","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4757-4182-7","volume-title":"Optimization","author":"K Lange","year":"2004","unstructured":"Lange, K.: Optimization. Springer, New York (2004)"},{"key":"10692_CR16","doi-asserted-by":"publisher","first-page":"325","DOI":"10.1080\/01621459.2012.746061","volume":"108","author":"F Liang","year":"2013","unstructured":"Liang, F., Cheng, Y., Song, Q., Park, J., Yang, P.: A resampling-based stochastic approximation method for analysis of large geostatistical data. J. Am. Stat. Assoc. 108, 325\u2013339 (2013)","journal-title":"J. Am. Stat. Assoc."},{"key":"10692_CR17","doi-asserted-by":"publisher","first-page":"73","DOI":"10.4310\/SII.2011.v4.n1.a8","volume":"4","author":"N Lin","year":"2011","unstructured":"Lin, N., Xi, R.: Aggregated estimating equation estimation. Statistics and its Interface 4, 73\u201383 (2011)","journal-title":"Statistics and its Interface"},{"key":"10692_CR18","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4899-3242-6","volume-title":"Generalized linear models","author":"P McCullagh","year":"1989","unstructured":"McCullagh, P., Nelder, J.A.: Generalized linear models, 2nd edn. Chapman and Hall, London (1989)","edition":"2"},{"key":"10692_CR19","unstructured":"Nesterov, Y.: A method of solving a convex programming problem with convergence rate O(1\/k2). Soviet Mathematics Doklady 27, 372\u2013376 (1983)"},{"key":"10692_CR20","doi-asserted-by":"publisher","first-page":"125","DOI":"10.1007\/s10107-012-0629-5","volume":"140","author":"Y Nesterov","year":"2013","unstructured":"Nesterov, Y.: Gradient methods for minimizing composite functions. Math. Program. 140, 125\u2013161 (2013)","journal-title":"Math. Program."},{"key":"10692_CR21","doi-asserted-by":"publisher","first-page":"1691","DOI":"10.1080\/07350015.2021.1961789","volume":"40","author":"R Pan","year":"2022","unstructured":"Pan, R., Ren, T., Guo, B., Li, F., Li, G., Wang, H.: A note on distributed quantile regression by pilot sampling and one-step updating. Journal of Business and Economic Statistics 40, 1691\u20131700 (2022)","journal-title":"Journal of Business and Economic Statistics"},{"key":"10692_CR22","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/0041-5553(64)90137-5","volume":"4","author":"BT Polyak","year":"1964","unstructured":"Polyak, B.T.: Some methods of speeding up the convergence of iteration methods. USSR Comput. Math. Math. Phys. 4, 1\u201317 (1964)","journal-title":"USSR Comput. Math. Math. Phys."},{"key":"10692_CR23","doi-asserted-by":"publisher","first-page":"145","DOI":"10.1016\/S0893-6080(98)00116-6","volume":"12","author":"N Qian","year":"1999","unstructured":"Qian, N.: On the momentum term in gradient descent learning algorithms. Neural Netw. 12, 145\u2013151 (1999)","journal-title":"Neural Netw."},{"key":"10692_CR24","doi-asserted-by":"publisher","first-page":"831","DOI":"10.1080\/01621459.2018.1448827","volume":"114","author":"M Quiroz","year":"2019","unstructured":"Quiroz, M., Kohn, R., Villani, M., Tran, M.-N.: Speeding up MCMC by efficient data subsampling. J. Am. Stat. Assoc. 114, 831\u2013843 (2019)","journal-title":"J. Am. Stat. Assoc."},{"key":"10692_CR25","unstructured":"Ruder, S.: An overview of gradient descent optimization algorithms. arXiv: 1609.04747v2 [cs.LG]15 (2017)"},{"key":"10692_CR26","doi-asserted-by":"publisher","first-page":"701","DOI":"10.1111\/biom.13423","volume":"78","author":"S Sauer","year":"2022","unstructured":"Sauer, S., Hedt-Gauthier, B., Rivera-Rodriguez, C., Haneuse, S.: Small-sample inference for cluster-based outcome-dependent sampling schemes in resource-limited settings: investigating low birthweight in Rwanda. Biometrics 78, 701\u2013715 (2022)","journal-title":"Biometrics"},{"key":"10692_CR27","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1093\/biomet\/asn073","volume":"96","author":"R Song","year":"2009","unstructured":"Song, R., Zhou, H., Kosorok, M.R.: A note on semiparametric efficient inference for two-stage outcome-dependent sampling with a continuous outcome. Biometrika 96, 221\u2013228 (2009)","journal-title":"Biometrika"},{"key":"10692_CR28","first-page":"1","volume":"17","author":"W Su","year":"2016","unstructured":"Su, W., Boyd, S., Candes, E.J.: A differential equation for modeling nesterov\u2019s accelerated gradient method: theory and insights. J. Mach. Learn. Res. 17, 1\u201343 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"10692_CR29","doi-asserted-by":"publisher","first-page":"663","DOI":"10.1093\/biostatistics\/kxw015","volume":"17","author":"Z Tan","year":"2016","unstructured":"Tan, Z., Qin, G., Zhou, H.: Estimation of a partially linear additive model for data from an outcome-dependent sampling design with a continuous outcome. Biostatistics 17, 663\u2013676 (2016)","journal-title":"Biostatistics"},{"key":"10692_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.csda.2021.107225","volume":"160","author":"K Wang","year":"2021","unstructured":"Wang, K., Li, S.: Robust distributed modal regression for massive data. Comput. Stat. Data Anal. 160, 107225 (2021)","journal-title":"Comput. Stat. Data Anal."},{"key":"10692_CR31","doi-asserted-by":"publisher","first-page":"73","DOI":"10.1007\/s11222-023-10247-x","volume":"33","author":"K Wang","year":"2023","unstructured":"Wang, K., Li, S.: Distributed statistical optimization for non-randomly stored big data with application to penalized learning. Stat. Comput. 33, 73 (2023)","journal-title":"Stat. Comput."},{"key":"10692_CR32","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1093\/biomet\/asaa043","volume":"108","author":"H Wang","year":"2021","unstructured":"Wang, H., Ma, Y.: Optimal subsampling for quantile regression in big data. Biometrika 108, 99\u2013112 (2021)","journal-title":"Biometrika"},{"key":"10692_CR33","doi-asserted-by":"publisher","first-page":"393","DOI":"10.1080\/01621459.2017.1408468","volume":"114","author":"H Wang","year":"2019","unstructured":"Wang, H., Yang, M., Stufken, J.: Information-based optimal subdata selection for big data linear regression. J. Am. Stat. Assoc. 114, 393\u2013405 (2019)","journal-title":"J. Am. Stat. Assoc."},{"key":"10692_CR34","doi-asserted-by":"publisher","DOI":"10.1016\/j.csda.2021.107265","volume":"162","author":"F Wang","year":"2021","unstructured":"Wang, F., Zhu, Y., Huang, D., Qi, H., Wang, H.: Distributed one-step upgraded estimation for non-uniformly and non-randomly distributed data. Comput. Stat. Data Anal. 162, 107265 (2021)","journal-title":"Comput. Stat. Data Anal."},{"key":"10692_CR35","unstructured":"Xu, G., Shang, Z., Cheng, G.: Optimal tuning for divide-and-conquer kernel ridge regression with massive data. Proceedings of the 35th International Conference on Machine Learning, 80, 5483\u20135491, PMLR (2018)"},{"key":"10692_CR36","doi-asserted-by":"publisher","first-page":"891","DOI":"10.1080\/10618600.2019.1586714","volume":"28","author":"G Xu","year":"2019","unstructured":"Xu, G., Shang, Z., Cheng, G.: Distributed generalized cross-validation for divide-and-conquer kernel ridge regression and its asymptotic optimality. J. Comput. Graph. Stat. 28, 891\u2013908 (2019)","journal-title":"J. Comput. Graph. Stat."},{"key":"10692_CR37","doi-asserted-by":"publisher","first-page":"1219","DOI":"10.1007\/s11425-016-0152-4","volume":"60","author":"S Yan","year":"2017","unstructured":"Yan, S., Ding, J., Liu, Y.: Statistical inference methods and applications of outcome-dependent sampling designs under generalized linear models. SCIENCE CHINA Math. 60, 1219\u20131238 (2017)","journal-title":"SCIENCE CHINA Math."},{"key":"10692_CR38","doi-asserted-by":"publisher","DOI":"10.1016\/j.jspi.2024.106253","volume":"237","author":"J Yin","year":"2025","unstructured":"Yin, J., Ding, J., Yang, C.: Outcome dependent subsampling divide and conquer in generalized linear models for massive data. Journal of Statistical Planning and Inference 237, 106253 (2025)","journal-title":"Journal of Statistical Planning and Inference"},{"key":"10692_CR39","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1007\/s00362-022-01386-w","volume":"65","author":"J Yu","year":"2024","unstructured":"Yu, J., Ai, M., Ye, Z.: A review on design inspired subsampling for big data. Stat. Pap. 65, 467\u2013510 (2024)","journal-title":"Stat. Pap."},{"key":"10692_CR40","doi-asserted-by":"publisher","first-page":"265","DOI":"10.1080\/01621459.2020.1773832","volume":"117","author":"J Yu","year":"2022","unstructured":"Yu, J., Wang, H., Ai, M., Zhang, H.: Optimal distributed subsampling for maximum quasi-likelihood estimators with massive data. J. Am. Stat. Assoc. 117, 265\u2013276 (2022)","journal-title":"J. Am. Stat. Assoc."},{"key":"10692_CR41","first-page":"3299","volume":"16","author":"YC Zhang","year":"2015","unstructured":"Zhang, Y.C., Duchi, J.C., Wainwright, M.J.: Divide and conquer kernel ridge regression: a distributed algorithm with minimax optimal rates. J. Mach. Learn. Res. 16, 3299\u20133340 (2015)","journal-title":"J. Mach. Learn. Res."},{"key":"10692_CR42","doi-asserted-by":"crossref","unstructured":"Zhang, H., Wang, H.: Distributed subdata selection for big data via sampling-based approach. Comput. Stat. Data Anal. 153, 107072 (2021)","DOI":"10.1016\/j.csda.2020.107072"},{"key":"10692_CR43","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1111\/j.0006-341X.2002.00413.x","volume":"58","author":"H Zhou","year":"2002","unstructured":"Zhou, H., Weaver, M.A., Qin, J., Longnecker, M.P., Wang, M.C.: A semiparametric empirical likelihood method for data from an outcome-dependent sampling scheme with a continuous outcome. Biometrics 58, 413\u2013421 (2002)","journal-title":"Biometrics"},{"key":"10692_CR44","doi-asserted-by":"crossref","unstructured":"Zhou, P., Yu, Z., Ma, J., Tian, M., Fan, Y.: Communication-efficient distributed estimator for generalized linear models with a diverging number of covariates. Comput. Stat. Data Anal. 157, 107154 (2021)","DOI":"10.1016\/j.csda.2020.107154"}],"container-title":["Statistics and Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11222-025-10692-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11222-025-10692-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11222-025-10692-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,8]],"date-time":"2025-09-08T04:46:46Z","timestamp":1757306806000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11222-025-10692-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7,28]]},"references-count":44,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2025,10]]}},"alternative-id":["10692"],"URL":"https:\/\/doi.org\/10.1007\/s11222-025-10692-w","relation":{},"ISSN":["0960-3174","1573-1375"],"issn-type":[{"type":"print","value":"0960-3174"},{"type":"electronic","value":"1573-1375"}],"subject":[],"published":{"date-parts":[[2025,7,28]]},"assertion":[{"value":"31 December 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 July 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}],"article-number":"156"}}