{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T02:06:25Z","timestamp":1772762785681,"version":"3.50.1"},"reference-count":29,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2025,3,6]],"date-time":"2025-03-06T00:00:00Z","timestamp":1741219200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,6]],"date-time":"2025-03-06T00:00:00Z","timestamp":1741219200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012632","name":"Nazarbayev University","doi-asserted-by":"publisher","award":["20122022FD4131"],"award-info":[{"award-number":["20122022FD4131"]}],"id":[{"id":"10.13039\/501100012632","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,4]]},"DOI":"10.1007\/s10994-025-06747-8","type":"journal-article","created":{"date-parts":[[2025,3,6]],"date-time":"2025-03-06T13:10:18Z","timestamp":1741266618000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Gradient descent fails to learn high-frequency functions and modular arithmetic"],"prefix":"10.1007","volume":"114","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7405-8254","authenticated-orcid":false,"given":"Rustem","family":"Takhanov","sequence":"first","affiliation":[]},{"given":"Maxat","family":"Tezekbayev","sequence":"additional","affiliation":[]},{"given":"Artur","family":"Pak","sequence":"additional","affiliation":[]},{"given":"Arman","family":"Bolatov","sequence":"additional","affiliation":[]},{"given":"Zhenisbek","family":"Assylbekov","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,6]]},"reference":[{"key":"6747_CR1","doi-asserted-by":"publisher","first-page":"146","DOI":"10.1016\/j.jctb.2016.03.004","volume":"121","author":"N Alon","year":"2016","unstructured":"Alon, N., & Shikhelman, C. (2016). Many t copies in h-free graphs. Journal of Combinatorial Theory, Series B, 121, 146\u2013172. https:\/\/doi.org\/10.1016\/j.jctb.2016.03.004. Fifty Years of the Journal of Combinatorial Theory.","journal-title":"Journal of Combinatorial Theory, Series B"},{"key":"6747_CR2","doi-asserted-by":"publisher","unstructured":"Assylbekov, Z., & Takhanov, R. (2018). Reusing weights in subword-aware neural language models. In M. Walker, H. Ji, & A. Stent (Eds.), Proceedings of the 2018 conference of the North American chapter of the association for computational linguistics: Human language technologies (long papers) (Vol. 1, pp. 1413\u20131423). New Orleans: Association for Computational Linguistics. https:\/\/doi.org\/10.18653\/v1\/N18-1128. https:\/\/aclanthology.org\/N18-1128","DOI":"10.18653\/v1\/N18-1128"},{"issue":"2","key":"6747_CR3","doi-asserted-by":"crossref","first-page":"234","DOI":"10.1007\/s000390050054","volume":"8","author":"F Barthe","year":"1998","unstructured":"Barthe, F. (1998). Optimal young\u2019s inequality and its converse: A simple proof. Geometric and Functional Analysis GAFA, 8(2), 234\u2013242.","journal-title":"Geometric and Functional Analysis GAFA"},{"issue":"8","key":"6747_CR4","doi-asserted-by":"crossref","first-page":"517","DOI":"10.1090\/S0002-9904-1944-08180-9","volume":"50","author":"R Bellman","year":"1944","unstructured":"Bellman, R. (1944). Almost orthogonal series. Bulletin of the American Mathematical Society, 50(8), 517\u2013519.","journal-title":"Bulletin of the American Mathematical Society"},{"key":"6747_CR5","doi-asserted-by":"publisher","unstructured":"Blum, A., Furst, M. L., Jackson, J. C., Kearns, M. J., Mansour, Y., & Rudich, S. (1994). Weakly learning DNF and characterizing statistical query learning using Fourier analysis. In F. T. Leighton & M. T. Goodrich (Eds.), Proceedings of the twenty-sixth annual ACM symposium on theory of computing, 23\u201325 May 1994, Montr\u00e9al, Qu\u00e9bec, Canada (pp. 253\u2013262). ACM. https:\/\/doi.org\/10.1145\/195058.195147","DOI":"10.1145\/195058.195147"},{"issue":"2","key":"6747_CR6","doi-asserted-by":"crossref","first-page":"361","DOI":"10.2307\/2371530","volume":"63","author":"R Boas","year":"1941","unstructured":"Boas, R. (1941). A general moment problem. American Journal of Mathematics, 63(2), 361\u2013370.","journal-title":"American Journal of Mathematics"},{"key":"6747_CR7","doi-asserted-by":"publisher","unstructured":"Feldman, V., Guzm\u00e1n, C., & Vempala, S. S. (2017). Statistical query algorithms for mean vector estimation and stochastic convex optimization. In P. N. Klein (Ed.), Proceedings of the twenty-eighth annual ACM-SIAM symposium on discrete algorithms, SODA 2017, Barcelona, Spain, Hotel Porta Fira, January 16\u201319 (pp. 1265\u20131277). SIAM. https:\/\/doi.org\/10.1137\/1.9781611974782.82","DOI":"10.1137\/1.9781611974782.82"},{"key":"6747_CR8","unstructured":"Gromov, A. (2023). Grokking modular arithmetic."},{"key":"6747_CR9","unstructured":"Hardt, M., & Ma, T. (2017). Identity matters in deep learning. In International conference on learning representations. https:\/\/openreview.net\/forum?id=ryxB0Rtxx"},{"key":"6747_CR10","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2015). Deep residual learning for image recognition.","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"6747_CR11","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural Computation, 9(8), 1735\u20131780. https:\/\/doi.org\/10.1162\/neco.1997.9.8.1735","journal-title":"Neural Computation"},{"key":"6747_CR12","doi-asserted-by":"publisher","unstructured":"Kearns, M. J. (1993). Efficient noise-tolerant learning from statistical queries. In S. R. Kosaraju, D. S. Johnson, & A. Aggarwal (Eds.), Proceedings of the twenty-fifth annual ACM symposium on theory of computing, May 16\u201318, 1993, San Diego, CA, USA (pp. 392\u2013401). ACM. https:\/\/doi.org\/10.1145\/167088.167200","DOI":"10.1145\/167088.167200"},{"issue":"2\u20133","key":"6747_CR13","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1007\/s10994-007-5010-1","volume":"69","author":"AR Klivans","year":"2007","unstructured":"Klivans, A. R., & Sherstov, A. A. (2007). Unconditional lower bounds for learning intersections of halfspaces. Machine Learning, 69(2\u20133), 97\u2013114. https:\/\/doi.org\/10.1007\/s10994-007-5010-1","journal-title":"Machine Learning"},{"issue":"1","key":"6747_CR14","doi-asserted-by":"publisher","first-page":"2","DOI":"10.1016\/j.jcss.2008.07.008","volume":"75","author":"AR Klivans","year":"2009","unstructured":"Klivans, A. R., & Sherstov, A. A. (2009). Cryptographic hardness for learning intersections of halfspaces. Journal of Computer and System Sciences, 75(1), 2\u201312. https:\/\/doi.org\/10.1016\/j.jcss.2008.07.008. Learning Theory 2006.","journal-title":"Journal of Computer and System Sciences"},{"key":"6747_CR15","series-title":"Dover Books on Mathematics","volume-title":"Uniform distribution of sequences","author":"L Kuipers","year":"2012","unstructured":"Kuipers, L., & Niederreiter, H. (2012). Uniform distribution of sequences. Dover Books on MathematicsMineola: Dover Publications."},{"key":"6747_CR16","unstructured":"Liu, Z., Michaud, E. J., & Tegmark, M. (2023). Omnigrok: Grokking beyond algorithmic data. In The eleventh international conference on learning representations. https:\/\/openreview.net\/forum?id=zDiHoIWa0q1"},{"key":"6747_CR17","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevLett.129.270501","volume":"129","author":"Z Liu","year":"2022","unstructured":"Liu, Z., Yu, L.-W., Duan, L.-M., & Deng, D.-L. (2022). Presence and absence of barren plateaus in tensor-network based machine learning. Physical Review Letters, 129, 270501. https:\/\/doi.org\/10.1103\/PhysRevLett.129.270501","journal-title":"Physical Review Letters"},{"key":"6747_CR18","unstructured":"Livni, R., Shalev-Shwartz, S., & Shamir, O. (2014). On the computational efficiency of training neural networks. In Z. Ghahramani, M. Welling, C. Cortes, N. Lawrence, & K. Q. Weinberger (Eds.), Advances in neural information processing systems.  (Vol. 27). Curran Associates Inc. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2014\/file\/3a0772443a0739141292a5429b952fe6-Paper.pdf"},{"issue":"1","key":"6747_CR19","doi-asserted-by":"publisher","first-page":"4812","DOI":"10.1038\/s41467-018-07090-4","volume":"9","author":"JR McClean","year":"2018","unstructured":"McClean, J. R., Boixo, S., Smelyanskiy, V. N., Babbush, R., & Neven, H. (2018). Barren plateaus in quantum neural network training landscapes. Nature Communications, 9(1), 4812. https:\/\/doi.org\/10.1038\/s41467-018-07090-4","journal-title":"Nature Communications"},{"key":"6747_CR20","unstructured":"Power, A., Burda, Y., Edwards, H., Babuschkin, I., & Misra, V. (2022). Grokking: Generalization beyond overfitting on small algorithmic datasets."},{"key":"6747_CR21","unstructured":"Shalev-Shwartz, S., Shamir, O., & Shammah, S. (2017). Failures of gradient-based deep learning. In D. Precup & Y. W. Teh (Eds.), Proceedings of the 34th international conference on machine learning, ICML 2017, Sydney, NSW, Australia, 6\u201311 August 2017. Proceedings of machine learning research (Vol. 70, pp. 3067\u20133075). PMLR. http:\/\/proceedings.mlr.press\/v70\/shalev-shwartz17a.html"},{"key":"6747_CR22","first-page":"32","volume":"19","author":"O Shamir","year":"2018","unstructured":"Shamir, O. (2018). Distribution-specific hardness of learning neural networks. Journal of Machine Learning Research, 19, 32\u201313229.","journal-title":"Journal of Machine Learning Research"},{"key":"6747_CR23","series-title":"Mathematical notes; 18","volume-title":"Introduction to ergodic theory","author":"YG Sinai","year":"1977","unstructured":"Sinai, Y. G. (1977). Introduction to ergodic theory. Mathematical notes; 18Princeton: Princeton University Press."},{"key":"6747_CR24","doi-asserted-by":"publisher","DOI":"10.1016\/j.dsp.2022.103819","volume":"133","author":"R Takhanov","year":"2023","unstructured":"Takhanov, R. (2023). Reducing the dimensionality of data using tempered distributions. Digital Signal Processing, 133, 103819. https:\/\/doi.org\/10.1016\/j.dsp.2022.103819","journal-title":"Digital Signal Processing"},{"key":"6747_CR25","unstructured":"Takhanov, R. (2024). Multi-layer random features and the approximation power of neural networks. In R. J. Evans & I. Shpitser (Eds.), Proceedings of the Fortieth conference on uncertainty in artificial intelligence. Proceedings of machine learning research (Vol. 248, pp. 33\u201344). PMLR."},{"key":"6747_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2023.109777","volume":"143","author":"R Takhanov","year":"2023","unstructured":"Takhanov, R., Abylkairov, Y. S., & Tezekbayev, M. (2023). Autoencoders for a manifold learning problem with a Jacobian rank constraint. Pattern Recognition, 143, 109777. https:\/\/doi.org\/10.1016\/j.patcog.2023.109777","journal-title":"Pattern Recognition"},{"key":"6747_CR27","unstructured":"Takhanov, R., Tezekbayev, M., Pak, A., Bolatov, A., Kadyrsizova, Z., & Assylbekov, Z. (2024). Intractability of learning the discrete logarithm with gradient-based methods. In B. Yan\u0131ko\u011flu & W. Buntine (Eds.), Proceedings of the 15th Asian conference on machine learning. Proceedings of machine learning research (Vol. 222, pp. 1321\u20131336). PMLR. https:\/\/proceedings.mlr.press\/v222\/takhanov24a.html"},{"key":"6747_CR28","unstructured":"Wenger, E., Chen, M., Charton, F., & Lauter, K. (2022). SALSA: Attacking lattice cryptography with transformers. Cryptology. ePrint Archive, Paper 2022\/935. https:\/\/eprint.iacr.org\/2022\/935"},{"key":"6747_CR29","doi-asserted-by":"crossref","unstructured":"Yang, K. (2001). On learning correlated Boolean functions using statistical query. In Proceedings of the algorithmic learning theory, 12th international conference, ALT 2001, Washington, DC, USA (pp. 59\u201376).","DOI":"10.1007\/3-540-45583-3_7"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-025-06747-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-025-06747-8","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-025-06747-8.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,6]],"date-time":"2026-03-06T01:01:41Z","timestamp":1772758901000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-025-06747-8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,6]]},"references-count":29,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2025,4]]}},"alternative-id":["6747"],"URL":"https:\/\/doi.org\/10.1007\/s10994-025-06747-8","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,6]]},"assertion":[{"value":"8 August 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 October 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 January 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"6 March 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}],"article-number":"117"}}