{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T14:30:39Z","timestamp":1768573839633,"version":"3.49.0"},"reference-count":43,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s10994-024-06683-z","type":"journal-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T11:19:52Z","timestamp":1737026392000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["On the convergence analysis of over-parameterized variational autoencoders: a neural tangent kernel perspective"],"prefix":"10.1007","volume":"114","author":[{"given":"Li","family":"Wang","sequence":"first","affiliation":[]},{"given":"Wei","family":"Huang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,1,16]]},"reference":[{"key":"6683_CR1","unstructured":"Alemi, A., Poole, B., Fischer, I., Dillon, J., Saurous, R. A., Murphy, K. (2018). Fixing a broken ELBO. In International conference on machine learning (PMLR) (pp. 159\u2013168)."},{"key":"6683_CR2","unstructured":"Allen-Zhu, Z., Li, Y., & Song, Z. (2019). A convergence theory for deep learning via over-parameterization. In International conference on machine learning (PMLR) (pp. 242\u2013252)."},{"key":"6683_CR3","unstructured":"Arora, S., Du, S., Hu, W., Li, Z., & Wang, R. (2019b). Fine-grained analysis of optimization and generalization for overparameterized two-layer neural networks. In International conference on machine learning (PMLR) (pp. 322\u2013332)."},{"key":"6683_CR4","unstructured":"Arora, S., Du, S. S., Hu, W., Li, Z., Salakhutdinov, R., & Wang, R. (2019a). On exact computation with an infinitely wide neural net. arXiv preprint arXiv:1904.11955"},{"key":"6683_CR5","doi-asserted-by":"crossref","unstructured":"Bowman, S. R., Vilnis, L., Vinyals, O., Dai, A. M., Jozefowicz, R., & Bengio, S. (2015). Generating sentences from a continuous space. arXiv preprint arXiv:1511.06349","DOI":"10.18653\/v1\/K16-1002"},{"key":"6683_CR6","first-page":"10836","volume":"32","author":"Y Cao","year":"2019","unstructured":"Cao, Y., & Gu, Q. (2019). Generalization bounds of stochastic gradient descent for wide and deep neural networks. Advances in Neural Information Processing Systems, 32, 10836\u201310846.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6683_CR7","unstructured":"Chen, R. T., Li, X., Grosse, R., & Duvenaud, D. (2018). Isolating sources of disentanglement in VAEs. In Proceedings of the 32nd international conference on neural information processing systems (Vol. 2615)."},{"key":"6683_CR8","unstructured":"Clerico, E., Deligiannidis, G., & Doucet, A. (2023). Wide stochastic networks: Gaussian limit and PAC-Bayesian training. In International conference on algorithmic learning theory (PMLR) (pp. 447\u2013470)."},{"key":"6683_CR9","unstructured":"Dai, B., & Wipf, D. (2019). Diagnosing and enhancing VAE models. arXiv preprint arXiv:1903.05789"},{"key":"6683_CR10","unstructured":"Dai, B., Wang, Z., & Wipf, D. (2020). The usual suspects? Reassessing blame for vae posterior collapse. In International conference on machine learning (PMLR) (pp. 2313\u20132322)."},{"key":"6683_CR11","first-page":"7180","volume":"34","author":"B Dai","year":"2021","unstructured":"Dai, B., Wenliang, L., & Wipf, D. (2021). On the value of infinite gradients in variational autoencoder models. Advances in Neural Information Processing Systems, 34, 7180\u20137192.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6683_CR12","unstructured":"Du, S., Lee, J., Li, H., Wang, L., & Zhai, X. (2019a). Gradient descent finds global minima of deep neural networks. In International conference on machine learning (PMLR) (pp. 1675\u20131685)."},{"key":"6683_CR13","unstructured":"Du, S. S., Zhai, X., Poczos, B., & Singh, A. (2018). Gradient descent provably optimizes over-parameterized neural networks. arXiv preprint arXiv:1810.02054"},{"key":"6683_CR14","first-page":"5723","volume":"32","author":"SS Du","year":"2019","unstructured":"Du, S. S., Hou, K., Salakhutdinov, R. R., Poczos, B., Wang, R., & Xu, K. (2019). Graph neural tangent kernel: Fusing graph neural networks with graph kernels. Advances in Neural Information Processing Systems, 32, 5723\u20135733.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6683_CR15","unstructured":"Franceschi, J.-Y., De\u00a0B\u00e9zenac, E., Ayed, I., Chen, M., Lamprier, S., & Gallinari, P. (2022). A neural tangent kernel perspective of GANs. In International conference on machine learning (PMLR) (pp. 6660\u20136704)."},{"key":"6683_CR16","unstructured":"He, J., Spokoyny, D., Neubig, G., & Berg-Kirkpatrick, T. (2019). Lagging inference networks and posterior collapse in variational autoencoders. arXiv preprint arXiv:1901.05534"},{"key":"6683_CR17","unstructured":"Higgins, I., Matthey, L., Pal, A., Burgess, C., Glorot, X., Botvinick, M., Mohamed, S., & Lerchner, A. (2016). beta-VAE: Learning basic visual concepts with a constrained variational framework."},{"key":"6683_CR18","unstructured":"Hron, J., Bahri, Y., Sohl-Dickstein, J., & Novak, R. (2020). Infinite attention: NNGP and NTK for deep attention networks. In International conference on machine learning (PMLR) (pp. 4376\u20134386)."},{"key":"6683_CR19","doi-asserted-by":"crossref","unstructured":"Huang, W., Du, W., & Da\u00a0Xu, R. Y. (2020) On the neural tangent kernel of deep networks with orthogonal initialization. arXiv preprint arXiv:2004.05867","DOI":"10.24963\/ijcai.2021\/355"},{"key":"6683_CR20","unstructured":"Huang, W., Liu, C., Chen, Y., Da\u00a0Xu, R. Y., Zhang, M., & Weng, T.-W. (2023). Analyzing deep PAC-Bayesian learning with neural tangent kernel: Convergence, analytic generalization bound, and efficient hyperparameter selection. Transactions on Machine Learning Research."},{"key":"6683_CR21","unstructured":"Jacot, A., Gabriel, F., & Hongler, C. (2018). Neural tangent kernel: Convergence and generalization in neural networks. arXiv preprint arXiv:1806.07572"},{"key":"6683_CR22","unstructured":"Kim, H., & Mnih, A. (2018). Disentangling by factorising. In International conference on machine learning (PMLR) (pp. 2649\u20132658)."},{"key":"6683_CR23","unstructured":"Kingma, D. P., & Welling, M. (2013). Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114"},{"key":"6683_CR24","unstructured":"Koehler, F., Mehta, V., Risteski, A., & Zhou, C. (2021). Variational autoencoders in the presence of low-dimensional data: landscape and implicit bias. arXiv preprint arXiv:2112.06868"},{"key":"6683_CR25","unstructured":"Kumar, A., & Poole, B. (2020). On implicit regularization in beta-VAE. In International conference on machine learning (PMLR) (pp. 5480\u20135490)."},{"key":"6683_CR26","unstructured":"Kumar, A., Sattigeri, P., & Balakrishnan, A. (2017). Variational inference of disentangled latent concepts from unlabeled observations. arXiv preprint arXiv:1711.00848"},{"key":"6683_CR27","doi-asserted-by":"crossref","unstructured":"Lee, J., Xiao, L., Schoenholz, S., Bahri, Y., Novak, R., Sohl-Dickstein, J., & Pennington, J. (2019). Wide neural networks of any depth evolve as linear models under gradient descent. In Advances in neural information processing systems Vol. 32.","DOI":"10.1088\/1742-5468\/abc62b"},{"key":"6683_CR28","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1109\/LSP.2020.2965328","volume":"27","author":"K-L Lim","year":"2020","unstructured":"Lim, K.-L., Jiang, X., & Yi, C. (2020). Deep clustering with variational autoencoder. IEEE Signal Processing Letters, 27, 231\u2013235.","journal-title":"IEEE Signal Processing Letters"},{"key":"6683_CR29","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, P., Wang, X. & Tang, X. (2015). Deep learning face attributes in the wild. In Proceedings of the IEEE international conference on computer vision (pp. 3730\u20133738).","DOI":"10.1109\/ICCV.2015.425"},{"key":"6683_CR30","first-page":"32","volume-title":"Don\u2019t blame the ELBO!","author":"J Lucas","year":"2019","unstructured":"Lucas, J., Tucker, G., Grosse, R. B., & Norouzi, M. (2019). Don\u2019t blame the ELBO! (p. 32). A linear VAE perspective on posterior collapse: Advances in neural information processing systems, vol."},{"key":"6683_CR31","unstructured":"Nakagawa, A., Kato, K., & Suzuki, T. (2021). Quantitative understanding of VAE as a non-linearly scaled isometric embedding. In International conference on machine learning (PMLR) (pp. 7916\u20137926)."},{"key":"6683_CR32","unstructured":"Ng, A., et\u00a0al. (2011). Sparse autoencoder. CS294A Lecture notes 72, 1\u201319"},{"issue":"7","key":"6683_CR33","doi-asserted-by":"publisher","first-page":"4669","DOI":"10.1109\/TIT.2021.3065212","volume":"67","author":"TV Nguyen","year":"2021","unstructured":"Nguyen, T. V., Wong, R. K., & Hegde, C. (2021). Benefits of jointly training autoencoders: An improved neural tangent kernel analysis. IEEE Transactions on Information Theory, 67(7), 4669\u20134692.","journal-title":"IEEE Transactions on Information Theory"},{"key":"6683_CR34","unstructured":"Radford, A., Metz, L., & Chintala, S. (2015). Unsupervised representation learning with deep convolutional generative adversarial networks. arXiv preprint arXiv:1511.06434"},{"key":"6683_CR35","unstructured":"Reed, S. E., Zhang, Y., Zhang, Y., & Lee, H. (2015). Deep visual analogy-making. In Advances in neural information processing systems (Vol. 28)."},{"key":"6683_CR36","doi-asserted-by":"crossref","unstructured":"Rolinek, M., Zietlow, D., & Martius, G. (2019). Variational autoencoders pursue PCA directions (by accident). In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 12406\u201312415).","DOI":"10.1109\/CVPR.2019.01269"},{"key":"6683_CR37","doi-asserted-by":"publisher","first-page":"144618","DOI":"10.1109\/ACCESS.2019.2944630","volume":"7","author":"T Song","year":"2019","unstructured":"Song, T., Sun, J., Chen, B., Peng, W., & Song, J. (2019). Latent space expanded variational autoencoder for sentence generation. IEEE Access, 7, 144618\u2013144627.","journal-title":"IEEE Access"},{"key":"6683_CR38","unstructured":"Tschannen, M., Bachem, O. & Lucic, M. (2018) Recent advances in autoencoder-based representation learning. arXiv preprint arXiv:1812.05069"},{"key":"6683_CR39","unstructured":"Van Den\u00a0Oord, A., & Vinyals, O. (2017). Neural discrete representation learning. In Advances in neural information processing systems (pp. 6306\u20136315)."},{"key":"6683_CR40","unstructured":"Wipf, D.: Marginalization is not marginal: No bad VAE local minima when learning optimal sparse representations."},{"key":"6683_CR41","unstructured":"Yang, G. (2019). Scaling limits of wide neural networks with weight sharing: Gaussian process behavior, gradient independence, and neural tangent kernel derivation. arXiv preprint arXiv:1902.04760"},{"key":"6683_CR42","unstructured":"Ziyin, L., Zhang, H., Meng, X., Lu, Y., Xing, E., & Ueda, M. (2022). Stochastic neural networks with infinite width are deterministic. arXiv preprint arXiv:2201.12724"},{"issue":"3","key":"6683_CR43","doi-asserted-by":"publisher","first-page":"467","DOI":"10.1007\/s10994-019-05839-6","volume":"109","author":"D Zou","year":"2020","unstructured":"Zou, D., Cao, Y., Zhou, D., & Gu, Q. (2020). Gradient descent optimizes over-parameterized deep RELU networks. Machine Learning, 109(3), 467\u2013492.","journal-title":"Machine Learning"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06683-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-024-06683-z","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-024-06683-z.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,16]],"date-time":"2026-01-16T01:03:15Z","timestamp":1768525395000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-024-06683-z"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,1]]},"references-count":43,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["6683"],"URL":"https:\/\/doi.org\/10.1007\/s10994-024-06683-z","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,1]]},"assertion":[{"value":"20 May 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 August 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 December 2024","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 January 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}],"article-number":"15"}}