{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,7]],"date-time":"2026-05-07T16:23:34Z","timestamp":1778171014046,"version":"3.51.4"},"reference-count":71,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"2","license":[{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,2,1]],"date-time":"2023-02-01T00:00:00Z","timestamp":1675209600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"General Research Fund"},{"name":"Research Grants Council of Hong Kong","award":["RGC\/HKBU12202621"],"award-info":[{"award-number":["RGC\/HKBU12202621"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Pattern Anal. Mach. Intell."],"published-print":{"date-parts":[[2023,2,1]]},"DOI":"10.1109\/tpami.2022.3160509","type":"journal-article","created":{"date-parts":[[2022,3,21]],"date-time":"2022-03-21T20:31:45Z","timestamp":1647894705000},"page":"1949-1962","source":"Crossref","is-referenced-by-count":13,"title":["Learning Hierarchical Variational Autoencoders With Mutual Information Maximization for Autoregressive Sequence Modeling"],"prefix":"10.1109","volume":"45","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-0357-7633","authenticated-orcid":false,"given":"Dong","family":"Qian","sequence":"first","affiliation":[{"name":"Department of Computer Science, Hong Kong Baptist University, Kowloon Tong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7428-2050","authenticated-orcid":false,"given":"William K.","family":"Cheung","sequence":"additional","affiliation":[{"name":"Department of Computer Science, Hong Kong Baptist University, Kowloon Tong, Hong Kong"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1561\/2200000006"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.50"},{"key":"ref3","article-title":"Language models are unsupervised multitask learners","author":"Radford","year":"2019","journal-title":"OpenAI Blog"},{"key":"ref4","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Brown"},{"key":"ref5","article-title":"Auto-encoding variational Bayes","volume-title":"Proc. 2nd Int. Conf. Learn. Representations","author":"Kingma"},{"key":"ref6","first-page":"1278","article-title":"Stochastic backpropagation and approximate inference in deep generative models","volume-title":"Proc. 31st Int. Conf. Mach. Learn.","author":"Rezende"},{"key":"ref7","first-page":"1530","article-title":"Variational inference with normalizing flows","volume-title":"Proc. 32nd Int. Conf. Mach. Learn.","author":"Rezende"},{"key":"ref8","article-title":"Importance weighted autoencoders","volume-title":"Proc. 4th Int. Conf. Learn. Representations","author":"Burda"},{"key":"ref9","first-page":"4743","article-title":"Improving variational inference with inverse autoregressive flow","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Kingma"},{"key":"ref10","first-page":"1214","article-title":"VAE with a VampPrior","volume-title":"Proc. 21st Int. Conf. Artif. Intell. Statist.","author":"Tomczak"},{"key":"ref11","first-page":"6548","article-title":"BIVA: A very deep hierarchy of latent variables for generative modeling","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Maal\u00f8e"},{"key":"ref12","first-page":"19 667","article-title":"NVAE: A deep hierarchical variational autoencoder","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Vahdat"},{"key":"ref13","article-title":"Very deep VAEs generalize autoregressive models and can outperform them on images","volume-title":"Proc. 9th Int. Conf. Learn. Representations","author":"Child"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/K16-1002"},{"key":"ref15","first-page":"2172","article-title":"InfoGAN: Interpretable representation learning by information maximizing generative adversarial nets","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Chen"},{"key":"ref16","first-page":"552","article-title":"Better mixing via deep representations","volume-title":"Proc. 30th Int. Conf. Mach. Learn.","author":"Bengio"},{"key":"ref17","first-page":"3738","article-title":"Ladder variational autoencoders","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"S\u00f8nderby"},{"key":"ref18","article-title":"PixelVAE: A latent variable model for natural images","volume-title":"Proc. 5th Int. Conf. Learn. Representations","author":"Gulrajani"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1416"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/n19-1021"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.350"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.378"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.acl-main.646"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1437"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i05.6413"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.73"},{"key":"ref27","first-page":"4348","article-title":"On adversarial mixup resynthesis","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Beckham"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_36"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1370"},{"key":"ref30","first-page":"2397","article-title":"Avoiding latent variable collapse with generative skip models","volume-title":"Proc. 22nd Int. Conf. Artif. Intell. Statist.","author":"Dieng"},{"key":"ref31","article-title":"Lagging inference networks and posterior collapse in variational autoencoders","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"He"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D17-1066"},{"key":"ref33","first-page":"3881","article-title":"Improved variational autoencoders for text modeling using dilated convolutions","volume-title":"Proc. 34th Int. Conf. Mach. Learn.","author":"Yang"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.findings-acl.11"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1995.7.5.889"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1126\/science.7761831"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P19-1200"},{"key":"ref38","article-title":"Progressive learning and disentanglement of hierarchical representations","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Li"},{"key":"ref39","first-page":"4091","article-title":"Learning hierarchical features from deep generative models","volume-title":"Proc. 34th Int. Conf. on Mach. Learn.","author":"Zhao"},{"key":"ref40","first-page":"530","article-title":"Mutual information neural estimation","volume-title":"Proc. 35th Int. Conf. Mach. Learn.","author":"Belghazi"},{"key":"ref41","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","volume-title":"Proc. 37th Int. Conf. Mach. Learn.","author":"Chen"},{"key":"ref42","first-page":"4182","article-title":"Data-efficient image recognition with contrastive predictive coding","volume-title":"Proc. 37th Int. Conf. on Mach. Learn.","author":"H\u00e9naff"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref44","article-title":"Learning deep representations by mutual information estimation and maximization","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Hjelm"},{"key":"ref45","first-page":"15 509","article-title":"Learning representations by maximizing mutual information across views","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Bachman"},{"key":"ref46","article-title":"Representation learning with contrastive predictive coding","author":"van den Oord","year":"2018","journal-title":"Comput. Res. Repository"},{"key":"ref47","article-title":"A mutual information maximization perspective of language representation learning","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Kong"},{"key":"ref48","article-title":"Deep graph Infomax","volume-title":"Proc. 7th Int. Conf. Learn. Representations","author":"Veli\u010dkovi\u0107"},{"key":"ref49","article-title":"InfoGraph: Unsupervised and semi-supervised graph-level representation learning via mutual information maximization","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Sun"},{"key":"ref50","first-page":"875","article-title":"Formal limitations on the measurement of mutual information","volume-title":"Proc. 23rd Int. Conf. Artif. Intell. Statist.","author":"McAllester"},{"key":"ref51","article-title":"Understanding the limitations of variational mutual information estimators","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Song"},{"key":"ref52","article-title":"On mutual information maximization for representation learning","volume-title":"Proc. 8th Int. Conf. Learn. Representations","author":"Tschannen"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.3115\/v1\/d14-1179"},{"key":"ref55","first-page":"6000","article-title":"Attention is all you need","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Vaswani"},{"key":"ref56","first-page":"4790","article-title":"Conditional image generation with PixelCNN decoders","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"van den Oord"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/TIT.2010.2068870"},{"issue":"2","key":"ref58","doi-asserted-by":"crossref","first-page":"183","DOI":"10.1023\/A:1007665907178","article-title":"An introduction to variational methods for graphical models","volume":"37","author":"Jordan","year":"1999","journal-title":"Mach. Learn."},{"key":"ref59","first-page":"15 578","article-title":"Wasserstein dependency measure for representation learning","volume-title":"Proc. Adv. Neural Informat. Process. Syst.","author":"Ozair"},{"key":"ref60","article-title":"Taming VAEs","author":"Rezende","year":"2018","journal-title":"Comput. Res. Repository"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.21236\/ADA273556"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/d15-1075"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1126\/science.aab3050"},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.425"},{"key":"ref65","article-title":"Learning multiple layers of features from tiny images","author":"Krizhevsky","year":"2009"},{"key":"ref66","article-title":"Variational lossy autoencoder","volume-title":"Proc. 5th Int. Conf. Learn. Representations","author":"Chen"},{"key":"ref67","first-page":"187","article-title":"KenLM: Faster and smaller language model queries","volume-title":"Proc. 6th Workshop Statist. Mach. Transl.","author":"Heafield"},{"key":"ref68","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2020.emnlp-main.448"},{"key":"ref69","first-page":"6438","article-title":"Manifold mixup: Better representations by interpolating hidden states","volume-title":"Proc. 36th Int. Conf. Mach. Learn.","author":"Verma"},{"key":"ref70","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"ref71","first-page":"1353","article-title":"Generative neural machine translation","volume-title":"Proc. Adv. Neural Inform. Process. Syst.","author":"Shah"}],"container-title":["IEEE Transactions on Pattern Analysis and Machine Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/34\/10008914\/09739027.pdf?arnumber=9739027","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,18]],"date-time":"2024-01-18T00:40:06Z","timestamp":1705538406000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9739027\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,1]]},"references-count":71,"journal-issue":{"issue":"2"},"URL":"https:\/\/doi.org\/10.1109\/tpami.2022.3160509","relation":{},"ISSN":["0162-8828","2160-9292","1939-3539"],"issn-type":[{"value":"0162-8828","type":"print"},{"value":"2160-9292","type":"electronic"},{"value":"1939-3539","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023,2,1]]}}}