{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T08:59:42Z","timestamp":1768294782491,"version":"3.49.0"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"9","license":[{"start":{"date-parts":[[2020,8,7]],"date-time":"2020-08-07T00:00:00Z","timestamp":1596758400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,8,7]],"date-time":"2020-08-07T00:00:00Z","timestamp":1596758400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Neural Comput &amp; Applic"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s00521-020-05270-2","type":"journal-article","created":{"date-parts":[[2020,8,7]],"date-time":"2020-08-07T20:02:26Z","timestamp":1596830546000},"page":"4429-4444","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":35,"title":["Attribute-based regularization of latent spaces for variational auto-encoders"],"prefix":"10.1007","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8353-6358","authenticated-orcid":false,"given":"Ashis","family":"Pati","sequence":"first","affiliation":[]},{"given":"Alexander","family":"Lerch","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2020,8,7]]},"reference":[{"key":"5270_CR1","unstructured":"Adel T, Ghahramani Z, Weller A (2018) Discovering interpretable representations for both deep generative and discriminative models. In: 35th international conference on machine learning (ICML), Stockholm, Sweeden, pp 50\u201359"},{"key":"5270_CR2","doi-asserted-by":"crossref","unstructured":"Akuzawa K, Iwasawa Y, Matsuo Y (2018) Expressive speech synthesis via modeling expressions with variational autoencoder. In: 19th Interspeech, Graz, Austria","DOI":"10.21437\/Interspeech.2018-1113"},{"key":"5270_CR3","doi-asserted-by":"crossref","unstructured":"Aubry M, Maturana D, Efros AA, Russell BC, Sivic J (2014) Seeing 3D chairs: exemplar part-based 2D-3D alignment using a large dataset of CAD models. In: IEEE conference on computer vision and pattern recognition (CVPR), Columbus, Ohio, USA, pp 3762\u20133769","DOI":"10.1109\/CVPR.2014.487"},{"issue":"8","key":"5270_CR4","doi-asserted-by":"publisher","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio Y, Courville A, Vincent P (2013) Representation learning: a review and new perspectives. IEEE Trans Pattern Anal Mach Intell 35(8):1798\u20131828","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"5270_CR5","doi-asserted-by":"crossref","unstructured":"Bouchacourt D, Tomioka R, Nowozin S (2018) Multi-level variational autoencoder: learning disentangled representations from grouped observations. In: 32nd AAAI conference on artificial intelligence, New Orleans, USA","DOI":"10.1609\/aaai.v32i1.11867"},{"key":"5270_CR6","doi-asserted-by":"crossref","unstructured":"Bowman SR, Vilnis L, Vinyals O, Dai AM, Jozefowicz R, Bengio S (2016) Generating Sentences from a Continuous Space. In: SIGNLL conference on computational natural language learning, Berlin, Germany","DOI":"10.18653\/v1\/K16-1002"},{"key":"5270_CR7","unstructured":"Brunner G, Konrad A, Wang Y, Wattenhofer R (2018) MIDI-VAE: modeling dynamics and instrumentation of music with applications to style transfer. In: 19th international society for music information retrieval conference (ISMIR), Paris, France"},{"key":"5270_CR8","unstructured":"Burges C, Hyunjik K (2020) 3d-shapes dataset. https:\/\/github.com\/deepmind\/3d-shapes. Last accessed, 2nd April 2020"},{"key":"5270_CR9","unstructured":"Burgess CP, Higgins I, Pal A, Matthey L, Watters N, Desjardins G, Lerchner A (2018) Understanding disentangling in $\\beta $-VAE. arXiv:1804.03599 [cs, stat]"},{"issue":"12","key":"5270_CR10","doi-asserted-by":"publisher","first-page":"e9","DOI":"10.23915\/distill.00009","volume":"2","author":"S Carter","year":"2017","unstructured":"Carter S, Nielsen M (2017) Using artificial intelligence to augment human intelligence. Distill 2(12):e9. https:\/\/doi.org\/10.23915\/distill.00009","journal-title":"Distill"},{"key":"5270_CR11","first-page":"1","volume":"20","author":"DC Castro","year":"2019","unstructured":"Castro DC, Tan J, Kainz B, Konukoglu E, Glocker B (2019) Morpho-MNIST: quantitative assessment and diagnostics for representation learning. J Mach Learn Res 20:1\u201329","journal-title":"J Mach Learn Res"},{"key":"5270_CR12","unstructured":"Chen RTQ, Li X, Grosse R, Duvenaud D (2018) Isolating sources of disentanglement in variational autoencoders. In: Advances in neural information processing systems 31 (NeurIPS)"},{"key":"5270_CR13","unstructured":"Chen X, Duan Y, Houthooft R, Schulman J, Sutskever I, Abbeel P (2016) InfoGAN: interpretable representation learning by information maximizing generative adversarial nets. In: Advances in neural information processing systems 29 (NeurIPS), pp 2172\u20132180"},{"key":"5270_CR14","unstructured":"Cuthbert MS, Ariza C (2010) music21: a toolkit for computer-aided musicology and symbolic music data. In: 11th international society of music information retrieval conference (ISMIR), Utrecht, The Netherlands"},{"key":"5270_CR15","doi-asserted-by":"crossref","unstructured":"Dai Z, Yang Z, Yang Y, Carbonell J, Le QV, Salakhutdinov R (2019) Transformer-XL: attentive language models beyond a fixed-length context. In: Assoication of computational linguistics (ACL), Florence, Italy","DOI":"10.18653\/v1\/P19-1285"},{"key":"5270_CR16","unstructured":"Donahue C, Lipton ZC, Balsubramani A, McAuley J (2018) Semantically decomposing the latent spaces of generative adversarial networks. In: 6th international conference on learning representations (ICLR), Vancouver, Canada"},{"key":"5270_CR17","unstructured":"Eastwood C, Williams CKI (2018) A framework for the quantitative evaluation of disentangled representations. In: 6th international conference on learning representations (ICLR), Vancouver, Canada"},{"key":"5270_CR18","unstructured":"Engel J, Hoffman M, Roberts A (2017) Latent constraints: learning to generate conditionally from unconditional generative models. In: 5th international conference on learning representations (ICLR), Toulon, France"},{"key":"5270_CR19","doi-asserted-by":"crossref","unstructured":"Gatys LA, Ecker AS, Bethge M (2016) Image style transfer using convolutional neural networks. In: IEEE conference on computer vision and pattern recognition (CVPR), Las Vegas, USA, pp 2414\u20132423","DOI":"10.1109\/CVPR.2016.265"},{"key":"5270_CR20","unstructured":"Goodfellow I, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Advances in neural information processing systems 27 (NeurIPS), pp 2672\u20132680"},{"key":"5270_CR21","doi-asserted-by":"crossref","unstructured":"Hadjeres G, Nielsen F, Pachet F (2017) GLSR-VAE: geodesic latent space regularization for variational autoencoder architectures. In: IEEE symposium series on computational intelligence (SSCI), Hawaii, USA, pp 1\u20137","DOI":"10.1109\/SSCI.2017.8280895"},{"key":"5270_CR22","unstructured":"Higgins I, Matthey L, Pal A, Burgess C, Glorot X, Botvinick MM, Mohamed S, Lerchner A (2017) $\\beta $-VAE: learning basic visual concepts with a constrained variational framework. In: 5th international conference on learning representations (ICLR), Toulon, France"},{"key":"5270_CR23","doi-asserted-by":"crossref","unstructured":"Hsu WN, Zhang Y, Glass J (2017) Learning latent representations for speech generation and transformation. In: 18th Interspeech, Stockholm, Sweeden","DOI":"10.21437\/Interspeech.2017-349"},{"key":"5270_CR24","unstructured":"Huang CZA, Vaswani A, Uszkoreit J, Simon I, Hawthorne C, Shazeer N, Dai AM, Hoffman MD, Dinculescu M, Eck D (2018) Music transformer: generating music with long-term structure. In: 6th international conference on learning representations (ICLR), Vancouver, Canada"},{"key":"5270_CR25","unstructured":"Jozefowicz R, Zaremba W, Sutskever I (2015) An empirical exploration of recurrent network architectures. In: 32nd international conference on machine learning (ICML), Lille, France"},{"key":"5270_CR26","unstructured":"Kim H, Mnih A (2018) Disentangling by factorising. In: 35th international conference on machine learning (ICML), Stockholm, Sweeden"},{"key":"5270_CR27","unstructured":"Kingma DP, Ba J (2015) Adam: a method for stochastic optimization. In: 3rd international conference on learning representations (ICLR), San Diego, USA"},{"key":"5270_CR28","unstructured":"Kingma DP, Welling M (2014) Auto-encoding variational Bayes. In: 2nd international conference on learning representations (ICLR), Banff, Canada"},{"key":"5270_CR29","unstructured":"Klambauer G, Unterthiner T, Mayr A, Hochreiter S (2017) Self-normalizing neural networks. In: Advances in neural information processing systems 30 (NeurIPS), pp 971\u2013980"},{"key":"5270_CR30","unstructured":"Kulkarni TD, Whitney WF, Kohli P, Tenenbaum J (2015) Deep convolutional inverse graphics network. In: Advances in neural information processing systems 28 (NeurIPS), pp 2539\u20132547"},{"issue":"1","key":"5270_CR31","doi-asserted-by":"publisher","first-page":"79","DOI":"10.1214\/aoms\/1177729694","volume":"22","author":"S Kullback","year":"1951","unstructured":"Kullback S, Leibler RA (1951) On information and sufficiency. Ann Math Stat 22(1):79\u201386","journal-title":"Ann Math Stat"},{"key":"5270_CR32","unstructured":"Kumar A, Sattigeri P, Balakrishnan A (2017) Variational inference of disentangled latent concepts from unlabeled observations. In: 5th international conference on learning representations (ICLR), Toulon, France"},{"key":"5270_CR33","unstructured":"Lample G, Zeghidour N, Usunier N, Bordes A, Denoyer L, Ranzato MA (2017) Fader networks: manipulating images by sliding attributes. In: Advances in neural information processing systems 30 (NeurIPS), pp 5967\u20135976"},{"key":"5270_CR34","doi-asserted-by":"crossref","unstructured":"Ledig C, Theis L, Huszar F, Caballero J, Cunningham A, Acosta A, Aitken A, Tejani A, Totz J, Wang Z, Shi W (2017) Photo-realistic single image super-resolution using a generative adversarial network. In: IEEE conference on computer vision and pattern recognition (CVPR), Hawaii, USA, pp 4681\u20134690","DOI":"10.1109\/CVPR.2017.19"},{"key":"5270_CR35","doi-asserted-by":"crossref","unstructured":"Liu Z, Luo P, Wang X, Tang X (2015) Deep learning face attributes in the wild. In: Proceedings of the IEEE international conference on computer vision (ICCV), Santiago, Chile, pp 3730\u20133738","DOI":"10.1109\/ICCV.2015.425"},{"key":"5270_CR36","unstructured":"Locatello F, Bauer S, Lucic M, R\u00e4tsch G, Gelly S, Sch\u00f6lkopf B, Bachem O (2019) Challenging common assumptions in the unsupervised learning of disentangled representations. In: 36th international conference on machine learning (ICML), Long Beach, California, USA"},{"key":"5270_CR37","unstructured":"Matthey L, Higgins I, Hassabis D, Lerchner A (2017) dSprites: disentanglement testing sprites dataset. https:\/\/github.com\/deepmind\/dsprites-dataset. Last accessed, 2nd April 2020"},{"key":"5270_CR38","unstructured":"Mikolov T, Sutskever I, Chen K, Corrado GS, Dean J (2013) Distributed representations of words and phrases and their compositionality. In: Advances in neural information processing systems 26 (NeurIPS), pp 3111\u20133119"},{"key":"5270_CR39","unstructured":"Mirza M, Osindero S (2014) Conditional generative adversarial nets. arXiv:1411.1784 [cs, stat]"},{"key":"5270_CR40","unstructured":"Pati A, Lerch A, Hadjeres G (2019) Learning to traverse latent spaces for musical score inpainting. In: 20th international society for music information retrieval conference (ISMIR), Delft, The Netherlands"},{"key":"5270_CR41","unstructured":"Razavi A, van\u00a0den Oord A, Vinyals O (2019) Generating diverse high-fidelity images with VQ-VAE-2. In: Advances in neural information processing systems 32 (NeurIPS), pp 14866\u201314876"},{"key":"5270_CR42","unstructured":"Reed SE, Zhang Y, Zhang Y, Lee H (2015) Deep visual analogy-making. In: Advances in neural information processing systems 28 (NeurIPS), pp 1252\u20131260"},{"key":"5270_CR43","unstructured":"Rezende DJ, Mohamed S (2015) Variational inference with normalizing flows. In: 32nd international conference on machine learning (ICML), Lille, France. ArXiv: 1505.05770"},{"key":"5270_CR44","unstructured":"Ridgeway K, Mozer MC (2018) Learning deep disentangled embeddings with the F-statistic loss. In: Advances in neural information processing systems 31 (NeurIPS), pp 185\u2013194"},{"key":"5270_CR45","unstructured":"Roberts A, Engel J, Oore S, Eck D (2018) Learning latent representations of music to generate interactive musical palettes. In: Intelligent user interfaces workshops (IUI), Tokyo, Japan"},{"key":"5270_CR46","unstructured":"Roberts A, Engel J, Raffel C, Hawthorne C, Eck D (2018) A hierarchical latent vector model for learning long-term structure in music. In: 35th international conference on machine learning (ICML), Stockholm, Sweeden"},{"key":"5270_CR47","unstructured":"Rubenstein P, Scholkopf B, Tolstikhin I (2018) Learning disentangled representations with wasserstein auto-encoders. In: 6th international conference on learning representations (ICLR), workshop track, Vancouver, Canada"},{"key":"5270_CR48","unstructured":"Sohn K, Lee H, Yan X (2015) Learning structured output representation using deep conditional generative models. In: Advances in neural information processing systems 28 (NeurIPS)"},{"key":"5270_CR49","unstructured":"Sturm BL, Santos JF, Ben-Tal O, Korshunova I (2016) Music transcription modelling and composition using deep learning. In: 1st international conference on computer simulation of musical creativity (CSMC), Huddersfield, UK"},{"key":"5270_CR50","unstructured":"Toussaint G (2002) A mathematical analysis of African, Brazilian and Cuban Clave rhythms. In: BRIDGES: mathematical connections in art, music and science, pp 157\u2013168"},{"key":"5270_CR51","unstructured":"van\u00a0den Oord A, Kalchbrenner N, Espeholt L, kavukcuoglu K, Vinyals O, Graves A (2016) Conditional image generation with PixelCNN decoders. In: Advances in neural information processing systems 29 (NeurIPS), pp 4790\u20134798"},{"key":"5270_CR52","doi-asserted-by":"crossref","unstructured":"Vincent P, Larochelle H, Bengio Y, Manzagol PA (2008) Extracting and composing robust features with denoising autoencoders. In: 25th international conference on machine learning (ICML), Helsinki, Finland, pp 1096\u20131103","DOI":"10.1145\/1390156.1390294"},{"key":"5270_CR53","unstructured":"Wang Y, Stanton D, Zhang Y, Skerry-Ryan RJ, Battenberg E, Shor J, Xiao Y, Ren F, Jia Y, Saurous RA (2018) Style tokens: unsupervised style modeling, control and transfer in end-to-end speech synthesis. In: 35th international conference on machine learning (ICML), Stockholm, Sweeden"},{"key":"5270_CR54","doi-asserted-by":"crossref","unstructured":"Yan X, Yang J, Sohn K, Lee H (2016) Attribute2Image: conditional image generation from visual attributes. In: Leibe B, Matas J, Sebe N, Welling M (eds) European conference for computer vision (ECCV), Amsterdam, The Netherlands, pp 776\u2013791","DOI":"10.1007\/978-3-319-46493-0_47"},{"key":"5270_CR55","unstructured":"Yang J, Reed SE, Yang MH, Lee H (2015) Weakly-supervised disentangling with recurrent transformations for 3D view synthesis. In: Advances in neural information processing systems 28 (NeurIPS), pp 1099\u20131107"},{"key":"5270_CR56","unstructured":"Zhang Y, Gan Z, Fan K, Chen Z, Henao R, Shen D, Carin L (2017) Adversarial feature matching for text generation. In: 34th international conference on machine learning (ICML), Sydney, Australia, pp 4006\u20134015"}],"container-title":["Neural Computing and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05270-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00521-020-05270-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00521-020-05270-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,11,6]],"date-time":"2022-11-06T06:52:29Z","timestamp":1667717549000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00521-020-05270-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,8,7]]},"references-count":56,"journal-issue":{"issue":"9","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["5270"],"URL":"https:\/\/doi.org\/10.1007\/s00521-020-05270-2","relation":{},"ISSN":["0941-0643","1433-3058"],"issn-type":[{"value":"0941-0643","type":"print"},{"value":"1433-3058","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,8,7]]},"assertion":[{"value":"13 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"28 July 2020","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"7 August 2020","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Compliance with ethical standards"}},{"value":"The authors declare that they have no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}