{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,2]],"date-time":"2025-08-02T19:14:35Z","timestamp":1754162075597,"version":"3.41.2"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2019,5,1]],"date-time":"2019-05-01T00:00:00Z","timestamp":1556668800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2019,5]]},"DOI":"10.1109\/icassp.2019.8683370","type":"proceedings-article","created":{"date-parts":[[2019,4,17]],"date-time":"2019-04-17T16:01:56Z","timestamp":1555516916000},"page":"4010-4014","source":"Crossref","is-referenced-by-count":0,"title":["Learning Disentangled Representation in Latent Stochastic Models: A Case Study with Image Captioning"],"prefix":"10.1109","author":[{"given":"Nidhi","family":"Vyas","sequence":"first","affiliation":[{"name":"Language Technologies Institute, Carnegie Mellon University, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"SaiKrishna","family":"Rallabandi","sequence":"additional","affiliation":[{"name":"Language Technologies Institute, Carnegie Mellon University, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lalitesh","family":"Morishetti","sequence":"additional","affiliation":[{"name":"Language Technologies Institute, Carnegie Mellon University, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Eduard","family":"Hovy","sequence":"additional","affiliation":[{"name":"Language Technologies Institute, Carnegie Mellon University, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alan W","family":"Black","sequence":"additional","affiliation":[{"name":"Language Technologies Institute, Carnegie Mellon University, PA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref31","article-title":"Microsoft coco captions: Data collection and evaluation server","author":"chen","year":"2015","journal-title":"arXiv preprint arXiv 1504 00325"},{"key":"ref30","article-title":"The concrete distribution: A continuous relaxation of discrete random variables","author":"maddison","year":"2016","journal-title":"arXiv preprint arXiv 1611 00712"},{"key":"ref10","article-title":"Show-andfool: Crafting adversarial examples for neural image captioning","volume":"abs 1712 2051","author":"chen","year":"2017","journal-title":"CoRR"},{"key":"ref11","article-title":"Adversarial attacks and defences: A survey","volume":"abs 1810 69","author":"chakraborty","year":"2018","journal-title":"CoRR"},{"key":"ref12","article-title":"Practical adversarial attack against object detector","volume":"abs 1812 10217","author":"zhao","year":"2018","journal-title":"CoRR"},{"key":"ref13","article-title":"Adversarial examples: Attacks and defenses for deep learning","volume":"abs 1712 7107","author":"yuan","year":"2017","journal-title":"CoRR"},{"key":"ref14","first-page":"2980","article-title":"A recurrent latent variable model for sequential data","author":"chung","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref15","first-page":"4743","article-title":"Improved variational inference with inverse autoregressive flow","author":"kingma","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref16","article-title":"Variational lossy autoencoder","author":"chen","year":"2016","journal-title":"arXiv preprint arXiv 1611 02731"},{"key":"ref17","article-title":"Unsupervised feature learning and deep learning: A review and new perspectives","volume":"abs 1206 5538","author":"bengio","year":"2012","journal-title":"CoRR"},{"key":"ref18","article-title":"Hierarchical disentangled representations","author":"esmaeili","year":"2018","journal-title":"arXiv preprint arXiv 1804 02671"},{"key":"ref19","first-page":"1480","article-title":"Darla: Improving zero-shot transfer in reinforcement learning","volume":"70","author":"higgins","year":"0","journal-title":"Proc 34th Int Conf Mach Learn"},{"key":"ref28","article-title":"Understanding disentangling in beta-vae","author":"burgess","year":"2018","journal-title":"arXiv preprint arXiv 1804 02671"},{"key":"ref4","article-title":"Unsupervised multi-modal neural machine translation","volume":"abs 1811 11365","author":"su","year":"2018","journal-title":"CoRR"},{"journal-title":"International Conference on Learning Representations","article-title":"beta-vae: Learning basic visual concepts with a constrained variational framework","year":"2017","key":"ref27"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1145\/3078971.3079000"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1024"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref5","article-title":"Multimodal ma-? chine learning: A survey and taxonomy","author":"baltrusaitis","year":"2018","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref8","first-page":"4971","article-title":"Dont just assume; look and answer: Overcoming priors for visual question answering","author":"agrawal","year":"2018","journal-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.670"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.279"},{"key":"ref9","article-title":"C-vqa: A compositional split of the visual question answering (vqa) v1. 0 dataset","author":"agrawal","year":"2017","journal-title":"arXiv preprint arXiv 1704 08243"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889052"},{"key":"ref20","article-title":"The mythos of model interpretability","author":"lipton","year":"2016","journal-title":"arXiv preprint arXiv 1606 03490"},{"key":"ref22","article-title":"Generating sentences from a continuous space","author":"bowman","year":"2015","journal-title":"arXiv preprint arXiv 1511 06349"},{"key":"ref21","article-title":"Auto-encoding variational bayes","author":"kingma","year":"2013","journal-title":"arXiv preprint arXiv 1312 6114"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/P17-1061"},{"key":"ref23","article-title":"Multi-space variational encoderdecoders for semi-supervised labeled sequence transduction","author":"zhou","year":"2017","journal-title":"arXiv preprint arXiv 1704 01691"},{"key":"ref26","first-page":"1975","article-title":"Pixelgan autoencoders","author":"makhzani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref25","article-title":"Improving variational encoder-decoders in dialogue generation","author":"shen","year":"2018","journal-title":"arXiv preprint arxiv 1802 05807"}],"event":{"name":"ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","start":{"date-parts":[[2019,5,12]]},"location":"Brighton, UK","end":{"date-parts":[[2019,5,17]]}},"container-title":["ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/8671773\/8682151\/08683370.pdf?arnumber=8683370","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,7,29]],"date-time":"2025-07-29T18:20:57Z","timestamp":1753813257000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/8683370\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,5]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/icassp.2019.8683370","relation":{},"subject":[],"published":{"date-parts":[[2019,5]]}}}