{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,2,21]],"date-time":"2025-02-21T07:45:45Z","timestamp":1740123945646,"version":"3.37.3"},"reference-count":39,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2023,7,7]],"date-time":"2023-07-07T00:00:00Z","timestamp":1688688000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,7,7]],"date-time":"2023-07-07T00:00:00Z","timestamp":1688688000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Adv Comput Math"],"published-print":{"date-parts":[[2023,8]]},"DOI":"10.1007\/s10444-023-10054-y","type":"journal-article","created":{"date-parts":[[2023,7,7]],"date-time":"2023-07-07T07:04:17Z","timestamp":1688713457000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Deep learning theory of distribution regression with CNNs"],"prefix":"10.1007","volume":"49","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-2194-0101","authenticated-orcid":false,"given":"Zhan","family":"Yu","sequence":"first","affiliation":[]},{"given":"Ding-Xuan","family":"Zhou","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,7,7]]},"reference":[{"key":"10054_CR1","doi-asserted-by":"publisher","first-page":"930","DOI":"10.1109\/18.256500","volume":"39","author":"A Barron","year":"1993","unstructured":"Barron, A.: Universal approximation bounds for superpositions of a sigmoidal function. IEEE Trans. Inf. Theory 39, 930\u2013945 (1993)","journal-title":"IEEE Trans. Inf. Theory"},{"issue":"1","key":"10054_CR2","doi-asserted-by":"publisher","first-page":"52","DOI":"10.1016\/j.jco.2006.07.001","volume":"23","author":"F Bauer","year":"2007","unstructured":"Bauer, F., Pereverzev, S., Rosasco, L.: On regularization algorithms in learning theory. J. Complex. 23(1), 52\u201372 (2007)","journal-title":"J. Complex."},{"key":"10054_CR3","doi-asserted-by":"crossref","unstructured":"Stone, C.J.: Optimal global rates of convergence for nonparametric regression. Ann. Stat. 1040-1053 (1982)","DOI":"10.1214\/aos\/1176345969"},{"issue":"3","key":"10054_CR4","doi-asserted-by":"publisher","first-page":"799","DOI":"10.3150\/07-BEJ5102","volume":"13","author":"A Christmann","year":"2007","unstructured":"Christmann, A., Steinwart, I.: Consistency and robustness of kernel-based regression in convex risk minimization. Bernoulli 13(3), 799\u2013819 (2007)","journal-title":"Bernoulli"},{"issue":"208","key":"10054_CR5","doi-asserted-by":"publisher","first-page":"607","DOI":"10.1090\/S0025-5718-1994-1240656-2","volume":"63","author":"CK Chui","year":"1994","unstructured":"Chui, C.K., Li, X., Mhaskar, H.N.: Neural networks for localized approximation. Math. Comput. 63(208), 607\u2013623 (1994)","journal-title":"Math. Comput."},{"key":"10054_CR6","doi-asserted-by":"publisher","first-page":"737","DOI":"10.1142\/S0219530519400074","volume":"17","author":"CK Chui","year":"2019","unstructured":"Chui, C.K., Lin, S.B., Zhou, D.X.: Deep neural networks for rotation-invariance approximation and learning. Anal. Appl. 17, 737\u2013772 (2019)","journal-title":"Anal. Appl."},{"key":"10054_CR7","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1090\/S0273-0979-01-00923-5","volume":"39","author":"F Cucker","year":"2002","unstructured":"Cucker, F., Smale, S.: On the mathematical foundations of learning. Bulletin Amer. Math. Soc. 39, 1\u201349 (2002)","journal-title":"Bulletin Amer. Math. Soc."},{"key":"10054_CR8","doi-asserted-by":"publisher","DOI":"10.1017\/CBO9780511618796","volume-title":"Learning theory: an approximation theory viewpoint","author":"F Cucker","year":"2007","unstructured":"Cucker, F., Zhou, D.X.: Learning theory: an approximation theory viewpoint. Cambridge University Press, Cambridge (2007)"},{"key":"10054_CR9","doi-asserted-by":"crossref","unstructured":"DiBenedetto, E., Debenedetto, E.: Real analysis. Birkh\u00e4user, Boston (2002)","DOI":"10.1007\/978-1-4612-0117-5"},{"key":"10054_CR10","doi-asserted-by":"publisher","DOI":"10.1016\/j.jat.2020.105523","volume":"263","author":"SN Dong","year":"2021","unstructured":"Dong, S.N., Sun, W.C.: Distributed learning and distribution regression of coefficient regularization. J. Approx. Theory 263, 105523 (2021)","journal-title":"J. Approx. Theory"},{"key":"10054_CR11","doi-asserted-by":"crossref","unstructured":"Dong, S.N., Sun, W.C.: Learning rate of distribution regression with dependent samples. J. Complex. 101679, (2022)","DOI":"10.1016\/j.jco.2022.101679"},{"key":"10054_CR12","doi-asserted-by":"publisher","DOI":"10.1016\/j.jco.2019.101426","volume":"56","author":"ZY Fang","year":"2020","unstructured":"Fang, Z.Y., Guo, Z.C., Zhou, D.X.: Optimal learning rates for distribution regression. J. Complex. 56, 101426 (2020)","journal-title":"J. Complex."},{"issue":"30","key":"10054_CR13","first-page":"993","volume":"16","author":"YL Feng","year":"2015","unstructured":"Feng, Y.L., Huang, X.L., Shi, L., Yang, Y.N., Suykens, J.A.K.: Learning with the maximum correntropy criterion induced losses for regression. J. Mach. Learn. Res. 16(30), 993\u20131034 (2015)","journal-title":"J. Mach. Learn. Res."},{"issue":"3","key":"10054_CR14","doi-asserted-by":"publisher","first-page":"662","DOI":"10.1016\/j.acha.2017.11.005","volume":"47","author":"ZC Guo","year":"2019","unstructured":"Guo, Z.C., Shi, L.: Optimal rates for coefficient-based regularized regression. Appl. Comput. Harmon. Anal. 47(3), 662\u2013701 (2019)","journal-title":"Appl. Comput. Harmon. Anal."},{"issue":"3","key":"10054_CR15","doi-asserted-by":"publisher","first-page":"478","DOI":"10.1016\/j.acha.2017.06.001","volume":"46","author":"ZC Guo","year":"2019","unstructured":"Guo, Z.C., Lin, S.B., Shi, L.: Distributed learning with multi-penalty regularization. Appl. Comput. Harmon. Anal. 46(3), 478\u2013499 (2019)","journal-title":"Appl. Comput. Harmon. Anal."},{"key":"10054_CR16","doi-asserted-by":"publisher","first-page":"343","DOI":"10.1016\/j.neunet.2019.12.014","volume":"123","author":"S Hayakawa","year":"2020","unstructured":"Hayakawa, S., Suzuki, T.: On the minimax optimality and superiority of deep neural network learning over sparse parameter spaces. Neural Netw. 123, 343\u2013361 (2020)","journal-title":"Neural Netw."},{"key":"10054_CR17","doi-asserted-by":"publisher","first-page":"359","DOI":"10.1016\/0893-6080(89)90020-8","volume":"2","author":"K Hornik","year":"1989","unstructured":"Hornik, K., Stinchcombe, M., White, H.: Multilayer feedforward networks are universal approximators. Neural Netw. 2, 359\u2013366 (1989)","journal-title":"Neural Netw."},{"issue":"12","key":"10054_CR18","doi-asserted-by":"publisher","first-page":"7649","DOI":"10.1109\/TIT.2018.2874447","volume":"64","author":"JM Klusowski","year":"2018","unstructured":"Klusowski, J.M., Barron, A.R.: Approximation by combinations of ReLU and squared ReLU ridge functions with $$\\ell ^ 1$$ and $$\\ell ^ 0$$ controls. IEEE Trans. Inf. Theory 64(12), 7649\u20137656 (2018)","journal-title":"IEEE Trans. Inf. Theory"},{"key":"10054_CR19","doi-asserted-by":"publisher","first-page":"295","DOI":"10.1006\/jath.1993.1104","volume":"75","author":"YV Lin","year":"1993","unstructured":"Lin, Y.V., Pinkus, A.: Fundamentality of ridge functions. J. Approx. Theory 75, 295\u2013311 (1993)","journal-title":"J. Approx. Theory"},{"key":"10054_CR20","doi-asserted-by":"publisher","first-page":"68","DOI":"10.1006\/jath.1998.3304","volume":"99","author":"VE Maiorov","year":"1999","unstructured":"Maiorov, V.E.: On best approximation by ridge functions. J. Approx. Theory 99, 68\u201394 (1999)","journal-title":"J. Approx. Theory"},{"key":"10054_CR21","doi-asserted-by":"publisher","first-page":"778","DOI":"10.1016\/j.neunet.2021.09.027","volume":"144","author":"T Mao","year":"2021","unstructured":"Mao, T., Shi, Z.J., Zhou, D.X.: Theory of deep convolutional neural networks III: approximating radial functions. Neural Netw. 144, 778\u201379 (2021)","journal-title":"Neural Netw."},{"key":"10054_CR22","unstructured":"M\u00fccke, N.: Stochastic gradient descent meets distribution regression. In International Conference on Artificial Intelligence and Statistics (pp. 2143-2151). PMLR, (2021)"},{"key":"10054_CR23","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1007\/BF02070821","volume":"1","author":"HN Mhaskar","year":"1993","unstructured":"Mhaskar, H.N.: Approximation properties of a multilayered feedforward artificial neural network. Adv. Comput. Math. 1, 61\u201380 (1993)","journal-title":"Adv. Comput. Math."},{"key":"10054_CR24","doi-asserted-by":"crossref","unstructured":"Mhaskar, H.N.: Dimension independent bounds for general shallow networks. Neural Netw. 123, 142\u2013152 (2020)","DOI":"10.1016\/j.neunet.2019.11.006"},{"key":"10054_CR25","unstructured":"P\u00f3czos, B., Singh, A., Rinaldo, A., Wasserman, L.: Distribution-free distribution regression. In Artificial Intelligence and Statistics (pp. 507-515). PMLR, (2013)"},{"key":"10054_CR26","unstructured":"Steinwart, I., Christmann, A.: Support vector machines. Springer Science & Business Media, (2008)"},{"issue":"4","key":"10054_CR27","first-page":"1875","volume":"48","author":"J Schmidt-Hieber","year":"2020","unstructured":"Schmidt-Hieber, J.: Nonparametric regression using deep neural networks with ReLU activation function. Ann. Stat. 48(4), 1875\u20131897 (2020)","journal-title":"Ann. Stat."},{"issue":"3","key":"10054_CR28","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1090\/S0273-0979-04-01025-0","volume":"41","author":"S Smale","year":"2004","unstructured":"Smale, S., Zhou, D.X.: Shannon sampling and function reconstruction from point values. Bull. Am. Math. Soc. 41(3), 279\u2013305 (2004)","journal-title":"Bull. Am. Math. Soc."},{"issue":"2","key":"10054_CR29","doi-asserted-by":"publisher","first-page":"153","DOI":"10.1007\/s00365-006-0659-y","volume":"26","author":"S Smale","year":"2007","unstructured":"Smale, S., Zhou, D.X.: Learning theory estimates via integral operators and their approximations. Constr. Approx. 26(2), 153\u2013172 (2007)","journal-title":"Constr. Approx."},{"key":"10054_CR30","unstructured":"Szab\u00f3, Z., Gretton, A., P\u00f3czos, B., Sriperumbudur, B.: Two-stage sampled learning theory on distributions. In Artificial Intelligence and Statistics (pp. 948-957). PMLR, (2015)"},{"key":"10054_CR31","first-page":"5272","volume":"17","author":"Z Szab\u00f3","year":"2016","unstructured":"Szab\u00f3, Z., Sriperumbudur, B.K., P\u00f3czos, B., Gretton, A.: Learning theory for distribution regression. J. Mach. Learn. Res. 17, 5272\u2013311 (2016)","journal-title":"J. Mach. Learn. Res."},{"key":"10054_CR32","doi-asserted-by":"crossref","unstructured":"Villani, C.: Optimal transport: old and new (Vol. 338, p. 23). Berlin: Springer (2009)","DOI":"10.1007\/978-3-540-71050-9_28"},{"key":"10054_CR33","doi-asserted-by":"crossref","unstructured":"Yarotsky, D.: Error bounds for approximations with deep ReLU networks. Neural Networks 94, 103\u2013114 (2017)","DOI":"10.1016\/j.neunet.2017.07.002"},{"issue":"10","key":"10054_CR34","doi-asserted-by":"publisher","DOI":"10.1088\/1361-6420\/ac23c3","volume":"37","author":"Z Yu","year":"2021","unstructured":"Yu, Z., Ho, D.W.C., Shi, Z.J., Zhou, D.X.: Robust kernel-based distribution regression. Inverse Problems 37(10), 105014 (2021)","journal-title":"Inverse Problems"},{"key":"10054_CR35","doi-asserted-by":"crossref","unstructured":"Zhao, P., Zhou, Z.H.: Label distribution learning by optimal transport. In Proceedings of the AAAI Conference on Artificial Intelligence (Vol. 32, No. 1) (2018)","DOI":"10.1609\/aaai.v32i1.11609"},{"issue":"2","key":"10054_CR36","doi-asserted-by":"publisher","first-page":"787","DOI":"10.1016\/j.acha.2019.06.004","volume":"48","author":"DX Zhou","year":"2020","unstructured":"Zhou, D.X.: Universality of deep convolutional neural networks. Appl. Comput. Harmon. Anal. 48(2), 787\u2013794 (2020)","journal-title":"Appl. Comput. Harmon. Anal."},{"key":"10054_CR37","doi-asserted-by":"publisher","first-page":"895","DOI":"10.1142\/S0219530518500124","volume":"16","author":"DX Zhou","year":"2018","unstructured":"Zhou, D.X.: Deep distributed convolutional neural networks: universality. Anal. Appl. 16, 895\u2013919 (2018)","journal-title":"Anal. Appl."},{"key":"10054_CR38","doi-asserted-by":"crossref","unstructured":"Zhou, D.X.: Theory of deep convolutional neural networks: Downsampling. Neural Netw. 124, 319\u2013327 (2020)","DOI":"10.1016\/j.neunet.2020.01.018"},{"key":"10054_CR39","unstructured":"Zweig, A., Bruna, J.: A functional perspective on learning symmetric functions with neural networks. Int. Conf. Mach. Learn. (pp. 13023-13032). PMLR, (2021)"}],"container-title":["Advances in Computational Mathematics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10444-023-10054-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10444-023-10054-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10444-023-10054-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,8,22]],"date-time":"2023-08-22T07:10:19Z","timestamp":1692688219000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10444-023-10054-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,7,7]]},"references-count":39,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2023,8]]}},"alternative-id":["10054"],"URL":"https:\/\/doi.org\/10.1007\/s10444-023-10054-y","relation":{},"ISSN":["1019-7168","1572-9044"],"issn-type":[{"type":"print","value":"1019-7168"},{"type":"electronic","value":"1572-9044"}],"subject":[],"published":{"date-parts":[[2023,7,7]]},"assertion":[{"value":"21 August 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 June 2023","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 July 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest statement"}}],"article-number":"51"}}