{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:08:26Z","timestamp":1764266906533,"version":"3.46.0"},"reference-count":52,"publisher":"Springer Science and Business Media LLC","issue":"1","license":[{"start":{"date-parts":[[2023,12,7]],"date-time":"2023-12-07T00:00:00Z","timestamp":1701907200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,12,7]],"date-time":"2023-12-07T00:00:00Z","timestamp":1701907200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2024,1]]},"DOI":"10.1007\/s10994-023-06487-7","type":"journal-article","created":{"date-parts":[[2023,12,7]],"date-time":"2023-12-07T14:02:01Z","timestamp":1701957721000},"page":"281-323","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Continual variational dropout: a view of auxiliary local variables in continual learning"],"prefix":"10.1007","volume":"113","author":[{"given":"Nam Le","family":"Hai","sequence":"first","affiliation":[]},{"given":"Trang","family":"Nguyen","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0011-5137","authenticated-orcid":false,"given":"Linh Ngo","family":"Van","sequence":"additional","affiliation":[]},{"given":"Thien Huu","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Khoat","family":"Than","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,12,7]]},"reference":[{"key":"6487_CR1","unstructured":"Ahn, H., Cha, S., Lee, D., & Moon, T. (2019). Uncertainty-based continual learning with adaptive regularization. In Advances in Neural Information Processing Systems (pp. 4392\u20134402)."},{"key":"6487_CR2","doi-asserted-by":"crossref","unstructured":"Aljundi, R., Babiloni, F., Elhoseiny, M., Rohrbach, M., & Tuytelaars, T. (2018). Memory aware synapses: Learning what (not) to forget. In Proceedings of the European Conference on Computer Vision (ECCV) (pp. 139\u2013154).","DOI":"10.1007\/978-3-030-01219-9_9"},{"issue":"4","key":"6487_CR3","doi-asserted-by":"publisher","first-page":"3742","DOI":"10.1109\/TKDE.2021.3139469","volume":"35","author":"TX Bach","year":"2023","unstructured":"Bach, T. X., Anh, N. D., Linh, N. V., & Than, K. (2023). Dynamic transformation of prior knowledge into Bayesian models for data streams. IEEE Transactions on Knowledge and Data Engineering, 35(4), 3742\u20133750.","journal-title":"IEEE Transactions on Knowledge and Data Engineering"},{"key":"6487_CR4","unstructured":"Benzing, F. (2020). Understanding regularisation methods for continual learning. In Workshop of Advances in Neural Information Processing Systems."},{"key":"6487_CR5","unstructured":"Blundell, C., Cornebise, J., Kavukcuoglu, K., & Wierstra, D. (2015). Weight uncertainty in neural network. In International conference on machine learning (pp. 1613\u20131622). PMLR."},{"key":"6487_CR6","unstructured":"Boluki, S., Ardywibowo, R., Dadaneh, S. Z., Zhou, M., & Qian, X. (2020). Learnable Bernoulli dropout for bayesian deep learning. In The International Conference on Artificial Intelligence and Statistics, AISTATS (pp. 3905\u20133916)."},{"key":"6487_CR7","unstructured":"Cha, S., Hsu, H., Hwang, T., Calmon, F. P., & Moon, T. (2021). CPR: Classifier-projection regularization for continual learning. In 9th International Conference on Learning Representations, ICLR."},{"issue":"6","key":"6487_CR8","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1109\/MSP.2012.2211477","volume":"29","author":"L Deng","year":"2012","unstructured":"Deng, L. (2012). The MNIST database of handwritten digit images for machine learning research [best of the web]. IEEE Signal Processing Magazine, 29(6), 141\u2013142.","journal-title":"IEEE Signal Processing Magazine"},{"key":"6487_CR9","doi-asserted-by":"crossref","unstructured":"De\u00a0Lange, M., Aljundi, R., Masana, M., Parisot, S., Jia, X., Leonardis, A., Slabaugh, G., & Tuytelaars, T. (2021). A continual learning survey: Defying forgetting in classification tasks. IEEE Transactions on Pattern Analysis and Machine Intelligence","DOI":"10.1109\/TPAMI.2021.3057446"},{"key":"6487_CR10","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., Uszkoreit, J., &  Houlsby, N. (2020). An image is worth 16x16 words: Transformers for image recognition at scale. In International conference on learning representations"},{"key":"6487_CR11","unstructured":"Farquhar, S., & Gal, Y. (2018). A unifying bayesian view of continual learning. In The Bayesian deep learning workshop at neural information processing systems"},{"key":"6487_CR12","unstructured":"Gal, Y., Hron, J., & Kendall, A. (2017). Concrete dropout. In Advances in Neural Information Processing Systems (pp. 3581\u20133590)."},{"key":"6487_CR13","unstructured":"Ghahramani, Z., & Attias, H. (2000). Online variational Bayesian learning. In Slides from talk presented at NIPS workshop on online learning."},{"key":"6487_CR14","unstructured":"Goodfellow, I. J., Mirza, M., Xiao, D., Courville, A., & Bengio, Y. (2013). An empirical investigation of catastrophic forgetting in gradient-based neural networks. arXiv preprint arXiv:1312.6211"},{"key":"6487_CR15","unstructured":"Graves, A. (2011). Practical variational inference for neural networks. In Advances in Neural Information Processing Systems (pp. 2348\u20132356). Citeseer."},{"key":"6487_CR16","doi-asserted-by":"publisher","first-page":"85","DOI":"10.1016\/j.ijar.2019.05.010","volume":"112","author":"C Ha","year":"2019","unstructured":"Ha, C., Tran, V.-D., Van, L. N., & Than, K. (2019). Eliminating overfitting of probabilistic topic models on short and noisy text: The role of dropout. International Journal of Approximate Reasoning, 112, 85\u2013104.","journal-title":"International Journal of Approximate Reasoning"},{"key":"6487_CR17","doi-asserted-by":"crossref","unstructured":"Hendrycks, D., Basart, S., Mu, N., Kadavath, S., Wang, F., Dorundo, E., Desai, R., Zhu, T., Parajuli, S., Guo, M., Song, D., Steinhardt, J., & Gilmer, J. (2021). The many faces of robustness: A critical analysis of out-of-distribution generalization. In Proceedings of the IEEE\/CVF international conference on computer vision (pp. 8340\u20138349).","DOI":"10.1109\/ICCV48922.2021.00823"},{"key":"6487_CR18","unstructured":"Henning, C., Cervera, M., D\u2019Angelo, F., Von\u00a0Oswald, J., Traber, R., Ehret, B., Kobayashi, S., Grewe, B. F., & Sacramento, J. (2021). Posterior meta-replay for continual learning. In Advances in neural information processing systems (Vol. 34)."},{"key":"6487_CR19","unstructured":"Jung, S., Ahn, H., Cha, S., & Moon, T. (2020). Continual learning with node-importance based adaptive group sparse regularization. In Advances in neural information processing systems"},{"key":"6487_CR20","first-page":"2575","volume":"28","author":"DP Kingma","year":"2015","unstructured":"Kingma, D. P., Salimans, T., & Welling, M. (2015). Variational dropout and the local reparameterization trick. Advances in Neural Information Processing Systems, 28, 2575\u20132583.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6487_CR21","unstructured":"Kingma, D. P., & Welling, M. (2014). Auto-encoding variational bayes. In: Bengio, Y., LeCun, Y. (eds.) 2nd international conference on learning representations, ICLR."},{"issue":"13","key":"6487_CR22","doi-asserted-by":"publisher","first-page":"3521","DOI":"10.1073\/pnas.1611835114","volume":"114","author":"J Kirkpatrick","year":"2017","unstructured":"Kirkpatrick, J., Pascanu, R., Rabinowitz, N., Veness, J., Desjardins, G., Rusu, A. A., Milan, K., Quan, J., Ramalho, T., Grabska-Barwinska, A., et al. (2017). Overcoming catastrophic forgetting in neural networks. Proceedings of the National Academy of Sciences, 114(13), 3521\u20133526.","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"6487_CR23","unstructured":"Krizhevsky, A. (2009). Learning multiple layers of features from tiny images. Technical report, University of Toronto."},{"issue":"12","key":"6487_CR24","doi-asserted-by":"publisher","first-page":"2935","DOI":"10.1109\/TPAMI.2017.2773081","volume":"40","author":"Z Li","year":"2017","unstructured":"Li, Z., & Hoiem, D. (2017). Learning without forgetting. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(12), 2935\u20132947.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"6487_CR25","doi-asserted-by":"publisher","first-page":"345","DOI":"10.1016\/j.neucom.2021.10.047","volume":"468","author":"N Van Linh","year":"2022","unstructured":"Van Linh, N., Bach, T. X., & Than, K. (2022). A graph convolutional topic model for short and noisy text streams. Neurocomputing, 468, 345\u2013359.","journal-title":"Neurocomputing"},{"key":"6487_CR26","doi-asserted-by":"crossref","unstructured":"Liu, Y., Dong, W., Zhang, L., Gong, D., & Shi, Q. (2019). Variational bayesian dropout with a hierarchical prior. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (pp. 7124\u20137133).","DOI":"10.1109\/CVPR.2019.00729"},{"key":"6487_CR27","unstructured":"Loo, N., Swaroop, S., & Turner, R. E. (2021). Generalized variational continual learning. In International conference on learning representation"},{"issue":"3","key":"6487_CR28","doi-asserted-by":"publisher","first-page":"448","DOI":"10.1162\/neco.1992.4.3.448","volume":"4","author":"DJC MacKay","year":"1992","unstructured":"MacKay, D. J. C. (1992). A practical Bayesian framework for backpropagation networks. Neural Computation, 4(3), 448\u2013472.","journal-title":"Neural Computation"},{"key":"6487_CR29","unstructured":"Mirzadeh, S., Farajtabar, M., Pascanu, R., & Ghasemzadeh, H. (2020). Understanding the role of training regimes in continual learning. In Advances in neural information processing systems"},{"key":"6487_CR30","doi-asserted-by":"crossref","unstructured":"Mirzadeh, S. I., Farajtabar, M., & Ghasemzadeh, H. (2020). Dropout as an implicit gating mechanism for continual learning. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops (pp. 232\u2013233).","DOI":"10.1109\/CVPRW50498.2020.00124"},{"key":"6487_CR31","unstructured":"Molchanov, D., Ashukha, A., & Vetrov, D. (2017). Variational dropout sparsifies deep neural networks. In International conference on machine learning (pp. 2498\u20132507)."},{"key":"6487_CR32","volume-title":"Machine learning: A probabilistic perspective","author":"KP Murphy","year":"2012","unstructured":"Murphy, K. P. (2012). Machine learning: A probabilistic perspective. Cambridge: MIT Press."},{"key":"6487_CR33","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4612-0745-0","volume-title":"Bayesian learning for neural networks","author":"RM Neal","year":"1996","unstructured":"Neal, R. M. (1996). Bayesian learning for neural networks. Berlin: Springer."},{"key":"6487_CR34","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1016\/j.neucom.2022.07.019","volume":"505","author":"T Nguyen","year":"2022","unstructured":"Nguyen, T., Mai, T., Nguyen, N., Van, L. N., & Than, K. (2022b). Balancing stability and plasticity when learning topic models from short and noisy text streams. Neurocomputing, 505, 30\u201343.","journal-title":"Neurocomputing"},{"key":"6487_CR35","first-page":"15188","volume":"34","author":"S Nguyen","year":"2021","unstructured":"Nguyen, S., Nguyen, D., Nguyen, K., Than, K., Bui, H., & Ho, N. (2021). Structured dropout variational inference for Bayesian neural networks. Advances in Neural Information Processing Systems, 34, 15188\u201315202.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"8","key":"6487_CR36","doi-asserted-by":"publisher","first-page":"3025","DOI":"10.1007\/s10994-022-06169-w","volume":"111","author":"H Nguyen","year":"2022","unstructured":"Nguyen, H., Pham, H., Nguyen, S., Van Linh, N., & Than, K. (2022a). Adaptive infinite dropout for noisy and sparse data streams. Machine Learning, 111(8), 3025\u20133060.","journal-title":"Machine Learning"},{"key":"6487_CR37","unstructured":"Nguyen, C. V., Li, Y., Bui, T. D., & Turner, R. E. (2018). Variational continual learning. In International conference on learning representation."},{"key":"6487_CR38","doi-asserted-by":"crossref","unstructured":"Nguyen, V.-S., Nguyen, D.-T., Van, L.N., & Than, K. (2019). Infinite dropout for training bayesian models from data streams. In IEEE international conference on big data (Big Data) (pp. 125\u2013134). IEEE","DOI":"10.1109\/BigData47090.2019.9005544"},{"key":"6487_CR39","doi-asserted-by":"crossref","unstructured":"Oh, C., Adamczewski, K., & Park, M. (2020). Radial and directional posteriors for Bayesian deep learning. In The thirty-fourth conference on artificial intelligence, AAAI (pp. 5298\u20135305)","DOI":"10.1609\/aaai.v34i04.5976"},{"key":"6487_CR40","unstructured":"Paisley, J. W., Blei, D. M., & Jordan, M. I. (2012). Variational bayesian inference with stochastic search. In Proceedings of the 29th international conference on machine learning, ICML"},{"key":"6487_CR41","doi-asserted-by":"crossref","unstructured":"Phan, H., Tuan, A. P., Nguyen, S., Linh, N. V., & Than, K. (2022). Reducing catastrophic forgetting in neural networks via Gaussian mixture approximation. In Pacific-Asia Conference on Knowledge Discovery and Data Mining (pp. 106\u2013117). Springer: Berlin","DOI":"10.1007\/978-3-031-05933-9_9"},{"issue":"7","key":"6487_CR42","doi-asserted-by":"publisher","first-page":"1649","DOI":"10.1162\/089976601750265045","volume":"13","author":"M-A Sato","year":"2001","unstructured":"Sato, M.-A. (2001). Online model selection based on the variational bayes. Neural Computation, 13(7), 1649\u20131681.","journal-title":"Neural Computation"},{"key":"6487_CR43","first-page":"6747","volume":"34","author":"G Shi","year":"2021","unstructured":"Shi, G., Chen, J., Zhang, W., Zhan, L.-M., & Wu, X.-M. (2021). Overcoming catastrophic forgetting in incremental few-shot learning by finding flat minima. Advances in Neural Information Processing Systems, 34, 6747\u20136761.","journal-title":"Advances in Neural Information Processing Systems"},{"issue":"1","key":"6487_CR44","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava, N., Hinton, G., Krizhevsky, A., Sutskever, I., & Salakhutdinov, R. (2014). Dropout: A simple way to prevent neural networks from overfitting. The Journal of Machine Learning Research, 15(1), 1929\u20131958.","journal-title":"The Journal of Machine Learning Research"},{"key":"6487_CR45","unstructured":"Swaroop, S., Nguyen, C.V., Bui, T. D., & Turner, R. E. (2018). Improving and understanding variational continual learning. In NeurIPS Continual Learning Workshop."},{"key":"6487_CR46","unstructured":"Swiatkowski, J., Roth, K., Veeling, B., Tran, L., Dillon, J., Snoek, J., Mandt, S., Salimans, T., Jenatton, R., & Nowozin, S. (2020). The k-tied normal distribution: A compact parameterization of Gaussian mean field posteriors in Bayesian neural networks. In International conference on machine learning (pp. 9289\u20139299). PMLR."},{"key":"6487_CR47","doi-asserted-by":"crossref","unstructured":"Van, L.N., Hai, N.L., Pham, H., & Than, K. (2022). Auxiliary local variables for improving regularization\/prior approach in continual learning. In  Pacific-Asia conference on knowledge discovery and data mining (pp. 16\u201328). Springer: Berlin","DOI":"10.1007\/978-3-031-05933-9_2"},{"key":"6487_CR48","unstructured":"Van\u00a0de Ven, G. M., & Tolias, A. S. (2019). Three scenarios for continual learning. In NeurIPS\u2014Continual learning workshop"},{"key":"6487_CR49","unstructured":"Wah, C., Branson, S., Welinder, P., Perona, P., & Belongie, S. (2011). The Caltech-UCSD Birds-200-2011 dataset."},{"key":"6487_CR50","unstructured":"Wei, C., Kakade, S., & Ma, T. (2020). The implicit and explicit regularization effects of dropout. In International conference on machine learning (pp. 10181\u201310192). PMLR."},{"key":"6487_CR51","unstructured":"Yin, D., Farajtabar, M., & Li, A. (2020). Sola: Continual learning with second-order loss approximation. In Workshop of advances in neural information processing systems"},{"key":"6487_CR52","first-page":"3987","volume":"70","author":"F Zenke","year":"2017","unstructured":"Zenke, F., Poole, B., & Ganguli, S. (2017). Continual learning through synaptic intelligence. Proceedings of Machine Learning Research, 70, 3987.","journal-title":"Proceedings of Machine Learning Research"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-023-06487-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-023-06487-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-023-06487-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T18:03:54Z","timestamp":1764266634000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-023-06487-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,7]]},"references-count":52,"journal-issue":{"issue":"1","published-print":{"date-parts":[[2024,1]]}},"alternative-id":["6487"],"URL":"https:\/\/doi.org\/10.1007\/s10994-023-06487-7","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"type":"print","value":"0885-6125"},{"type":"electronic","value":"1573-0565"}],"subject":[],"published":{"date-parts":[[2023,12,7]]},"assertion":[{"value":"12 July 2022","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"19 September 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"4 November 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 December 2023","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that they have no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}},{"value":"The implementation for CVD can be found in\n                      \n                      .","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code availability"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethics approval"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":6,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}