{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,11]],"date-time":"2026-05-11T11:27:18Z","timestamp":1778498838158,"version":"3.51.4"},"reference-count":26,"publisher":"Springer Science and Business Media LLC","issue":"12","license":[{"start":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T00:00:00Z","timestamp":1667779200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2022,11,7]],"date-time":"2022-11-07T00:00:00Z","timestamp":1667779200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100004663","name":"Ministry of Science and Technology, Taiwan","doi-asserted-by":"publisher","award":["110-2634-F-002-050"],"award-info":[{"award-number":["110-2634-F-002-050"]}],"id":[{"id":"10.13039\/501100004663","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Mach Learn"],"published-print":{"date-parts":[[2022,12]]},"DOI":"10.1007\/s10994-022-06272-y","type":"journal-article","created":{"date-parts":[[2022,11,8]],"date-time":"2022-11-08T00:03:41Z","timestamp":1667865821000},"page":"4329-4357","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Bayesian mixture variational autoencoders for multi-modal learning"],"prefix":"10.1007","volume":"111","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8517-1698","authenticated-orcid":false,"given":"Keng-Te","family":"Liao","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bo-Wei","family":"Huang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chih-Chun","family":"Yang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shou-De","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2022,11,7]]},"reference":[{"key":"6272_CR1","doi-asserted-by":"crossref","unstructured":"Bagher\u00a0Zadeh, A., Liang, P. P., Poria, S., Cambria, E., & Morency, L. P. (2018). Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph. In Proceedings of the 56th annual meeting of the association for computational linguistics (Volume 1: Long Papers), association for computational linguistics (pp. 2236\u20132246).","DOI":"10.18653\/v1\/P18-1208"},{"key":"6272_CR2","doi-asserted-by":"publisher","first-page":"135","DOI":"10.1162\/tacl_a_00051","volume":"5","author":"P Bojanowski","year":"2017","unstructured":"Bojanowski, P., Grave, E., Joulin, A., & Mikolov, T. (2017). Enriching word vectors with subword information. Transactions of the Association for Computational Linguistics, 5, 135\u2013146.","journal-title":"Transactions of the Association for Computational Linguistics"},{"key":"6272_CR3","unstructured":"Burda, Y., Grosse, R. B., & Salakhutdinov, R. (2016). Importance weighted autoencoders. In 4th international conference on learning representations, ICLR 2016, San Juan, Puerto Rico, May 2-4, 2016, conference track proceedings."},{"key":"6272_CR4","doi-asserted-by":"crossref","unstructured":"Castro, S., Hazarika, D., P\u00e9rez-Rosas, V., Zimmermann, R., Mihalcea, R., & Poria, S. (2019). Towards multimodal sarcasm detection (an _obviously_ perfect paper). CoRR, arXiv:1906.01815.","DOI":"10.18653\/v1\/P19-1455"},{"key":"6272_CR5","doi-asserted-by":"crossref","unstructured":"Daunhawer, I., Sutter, T. M., Marcinkevi\u010ds, R., & Vogt, J. (2020). Self-supervised disentanglement of modality-specific and shared factors improves multimodal generative models. In GCPR.","DOI":"10.1007\/978-3-030-71278-5_33"},{"key":"6272_CR6","first-page":"441","volume":"31","author":"M Figurnov","year":"2018","unstructured":"Figurnov, M., Mohamed, S., & Mnih, A. (2018). Implicit reparameterization gradients. Advances in Neural Information Processing Systems, 31, 441\u2013452.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6272_CR7","unstructured":"Ghosh, P., Sajjadi, M. S. M., Vergari, A., Black, M. J., Sch\u00f6lkopf, B. (2020). From variational to deterministic autoencoders. In 8th international conference on learning representations, ICLR 2020, Addis Ababa, Ethiopia, April 26-30, 2020."},{"key":"6272_CR8","doi-asserted-by":"publisher","unstructured":"Hasan, M. K., Rahman, W., Bagher\u00a0Zadeh, A., Zhong, J., Tanveer, M. I., Morency, L. P., & Hoque, M. E. (2019). UR-FUNNY: A multimodal language dataset for understanding humor. In Proceedings of the 2019 conference on empirical methods in natural language processing and the 9th international joint conference on natural language processing (EMNLP-IJCNLP). Association for Computational Linguistics, Hong Kong, China (pp. 2046\u20132056). https:\/\/doi.org\/10.18653\/v1\/D19-1211, https:\/\/www.aclweb.org\/anthology\/D19-1211","DOI":"10.18653\/v1\/D19-1211"},{"key":"6272_CR9","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In 2016 IEEE conference on computer vision and pattern recognition, CVPR 2016, Las Vegas, NV, USA, June 27-30, 2016 (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"issue":"8","key":"6272_CR10","doi-asserted-by":"publisher","first-page":"1771","DOI":"10.1162\/089976602760128018","volume":"14","author":"GE Hinton","year":"2002","unstructured":"Hinton, G. E. (2002). Training products of experts by minimizing contrastive divergence. Neural Computation, 14(8), 1771\u20131800.","journal-title":"Neural Computation"},{"issue":"4","key":"6272_CR11","doi-asserted-by":"crossref","first-page":"382","DOI":"10.1214\/ss\/1009212519","volume":"14","author":"JA Hoeting","year":"1999","unstructured":"Hoeting, J. A., Madigan, D., Raftery, A. E., & Volinsky, C. T. (1999). Bayesian model averaging: A tutorial. Statistical Science, 14(4), 382\u2013401.","journal-title":"Statistical Science"},{"key":"6272_CR12","unstructured":"Hsu, W. N., & Glass, J. (2018). Disentangling by partitioning: A representation learning framework for multimodal sensory data."},{"key":"6272_CR13","unstructured":"Jang, E., Gu, S., & Poole, B. (2017). Categorical reparameterization with gumbel-softmax. In 5th international conference on learning representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings."},{"key":"6272_CR14","unstructured":"Kingma, D. P., & Ba, J. (2015). Adam: A method for stochastic optimization. In 3rd international conference on learning representations, ICLR 2015, San Diego, CA, USA, May 7-9, 2015, Conference Track Proceedings."},{"key":"6272_CR15","unstructured":"Kingma, D. P., & Welling, M. (2014). Auto-encoding variational bayes. In 2nd international conference on learning representations, ICLR 2014, Banff, AB, Canada, April 14-16, 2014, Conference Track Proceedings."},{"key":"6272_CR16","unstructured":"Liang, P. P., Lyu, Y., Fan, X., Wu, Z., Cheng, Y., Wu, J., Chen, L., Wu, P., Lee, M. A., Zhu, Y., Salakhutdinov, R., & Morency, L. (2021). Multibench: Multiscale benchmarks for multimodal representation learning. In Proceedings of the neural information processing systems track on datasets and benchmarks 1, NeurIPS Datasets and Benchmarks 2021, December 2021, virtual."},{"key":"6272_CR17","unstructured":"Maddison, C. J., Mnih, A., & Teh, Y. W. (2017). The concrete distribution: A continuous relaxation of discrete random variables. In 5th international conference on learning representations, ICLR 2017, Toulon, France, April 24-26, 2017, Conference Track Proceedings."},{"key":"6272_CR18","first-page":"132:1","volume":"21","author":"S Mohamed","year":"2020","unstructured":"Mohamed, S., Rosca, M., Figurnov, M., & Mnih, A. (2020). Monte Carlo gradient estimation in machine learning. JMLR, 21, 132:1-132:62.","journal-title":"JMLR"},{"key":"6272_CR19","first-page":"15718","volume":"32","author":"Y Shi","year":"2019","unstructured":"Shi, Y., Paige, B., & Torr, P. (2019). Variational mixture-of-experts autoencoders for multi-modal deep generative models. Advances in Neural Information Processing Systems, 32, 15718\u201315729.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"6272_CR20","unstructured":"Sutter, T. M., Daunhawer, I., & Vogt, J. E. (2020). Multimodal generative learning utilizing jensen-shannon-divergence. In Advances in neural information processing systems 33: Annual conference on neural information processing systems 2020, NeurIPS 2020, December 6-12, 2020, virtual."},{"key":"6272_CR21","unstructured":"Suzuki, M., Nakayama, K., & Matsuo, Y. (2017). Joint multimodal learning with deep generative models. In 5th international conference on learning representations, ICLR 2017, Toulon, France, April 24-26, 2017, Workshop Track Proceedings."},{"key":"6272_CR22","unstructured":"Tsai, Y. H., Liang, P. P., Zadeh, A., Morency, L., & Salakhutdinov, R. (2019). Learning factorized multimodal representations. In ICLR."},{"key":"6272_CR23","unstructured":"Vedantam, R., Fischer, I., Huang, J., & Murphy, K. (2018). Generative models of visually grounded imagination. In 6th international conference on learning representations, ICLR 2018, Vancouver, BC, Canada, April 30 - May 3, 2018, Conference Track Proceedings."},{"key":"6272_CR24","doi-asserted-by":"crossref","unstructured":"Vielzeuf, V., Lechervy, A., Pateux, S., & Jurie, F. (2018). Centralnet: A multilayer approach for multimodal fusion. CoRR, arxiv:1808.07275","DOI":"10.1007\/978-3-030-11024-6_44"},{"key":"6272_CR25","unstructured":"Wu, M., & Goodman, N. D. (2018). Multimodal generative models for scalable weakly-supervised learning. In Advances in neural information processing systems 31: Annual conference on neural information processing systems 2018, NeurIPS 2018, December 3-8, 2018, Montr\u00e9al, Canada (pp. 5580\u20135590)."},{"key":"6272_CR26","unstructured":"Zadeh, A., Zellers, R., Pincus, E., & Morency, L. (2016). MOSI: Multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos. CoRR, arXiv:1606.06259"}],"container-title":["Machine Learning"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-022-06272-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s10994-022-06272-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s10994-022-06272-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,11,7]],"date-time":"2023-11-07T01:04:36Z","timestamp":1699319076000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s10994-022-06272-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,11,7]]},"references-count":26,"journal-issue":{"issue":"12","published-print":{"date-parts":[[2022,12]]}},"alternative-id":["6272"],"URL":"https:\/\/doi.org\/10.1007\/s10994-022-06272-y","relation":{},"ISSN":["0885-6125","1573-0565"],"issn-type":[{"value":"0885-6125","type":"print"},{"value":"1573-0565","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,11,7]]},"assertion":[{"value":"24 November 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"25 August 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"16 October 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 November 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"All the authors declare no conflicts of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}},{"value":"Not applicable.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"Not applicable.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"Not applicable.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to publication"}},{"value":"This content has been made available to all.","name":"free","label":"Free to read"}]}}