{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,27]],"date-time":"2025-03-27T16:00:55Z","timestamp":1743091255670,"version":"3.40.3"},"publisher-location":"Cham","reference-count":60,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031732256"},{"type":"electronic","value":"9783031732263"}],"license":[{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,1]],"date-time":"2024-11-01T00:00:00Z","timestamp":1730419200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73226-3_7","type":"book-chapter","created":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:02:57Z","timestamp":1730386977000},"page":"110-126","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Sequential Representation Learning via\u00a0Static-Dynamic Conditional Disentanglement"],"prefix":"10.1007","author":[{"given":"Mathieu Cyrille","family":"Simon","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-4010-714X","authenticated-orcid":false,"given":"Pascal","family":"Frossard","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5049-2929","authenticated-orcid":false,"given":"Christophe De","family":"Vleeschouwer","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,1]]},"reference":[{"key":"7_CR1","unstructured":"Agrawal, S., Dukkipati, A.: Deep variational inference without pixel-wise reconstruction. arXiv preprint arXiv:1611.05209 (2016)"},{"key":"7_CR2","unstructured":"Aifanti, N., Papachristou, C., Delopoulos, A.: The MUG facial expression database. In: International Workshop on Image Analysis for Multimedia Interactive Services (2010)"},{"key":"7_CR3","doi-asserted-by":"crossref","unstructured":"Albarracin, J.F.H., Rivera, A.R.: Video reenactment as inductive bias for content-motion disentanglement. In: IEEE TIP (2022)","DOI":"10.1109\/TIP.2022.3153140"},{"key":"7_CR4","unstructured":"Bai, J., Wang, W., Gomes, C.P.: Contrastively disentangled sequential variational autoencoder. In: NeurIPS (2021)"},{"key":"7_CR5","doi-asserted-by":"crossref","unstructured":"Bengio, Y., Courville, A., Vincent, P.: Representation learning: A review and new perspectives. In: IEEE TPAMI (2013)","DOI":"10.1109\/TPAMI.2013.50"},{"key":"7_CR6","unstructured":"Berman, N., Naiman, I., Azencot, O.: Multifactor sequential disentanglement via structured Koopman autoencoders. arXiv preprint arXiv:2303.17264 (2023)"},{"key":"7_CR7","doi-asserted-by":"crossref","unstructured":"Bouchacourt, D., Tomioka, R., Nowozin, S.: Multi-level variational autoencoder: learning disentangled representations from grouped observations. In: AAAI (2018)","DOI":"10.1609\/aaai.v32i1.11867"},{"key":"7_CR8","unstructured":"Brehmer, J., De\u00a0Haan, P., Lippe, P., Cohen, T.S.: Weakly supervised causal representation learning. In: NeurIPS (2022)"},{"key":"7_CR9","doi-asserted-by":"crossref","unstructured":"Chen, C., Jafari, R., Kehtarnavaz, N.: UTD-MHAD: a multimodal dataset for human action recognition utilizing a depth camera and a wearable inertial sensor. In: ICIP (2015)","DOI":"10.1109\/ICIP.2015.7350781"},{"key":"7_CR10","unstructured":"Chen, R.T., Li, X., Grosse, R.B., Duvenaud, D.K.: Isolating sources of disentanglement in variational autoencoders. In: NeurIPS (2018)"},{"key":"7_CR11","unstructured":"Chen, X., et al.: Variational lossy autoencoder. arXiv preprint arXiv:1611.02731 (2016)"},{"key":"7_CR12","unstructured":"Denton, E.L., et\u00a0al.: Unsupervised learning of disentangled representations from video. In: NeurIPS (2017)"},{"key":"7_CR13","unstructured":"Dinh, L., Krueger, D., Bengio, Y.: NICE: Non-linear independent components estimation. arXiv preprint arXiv:1410.8516 (2014)"},{"key":"7_CR14","doi-asserted-by":"crossref","unstructured":"Fragemann, J., Ardizzone, L., Egger, J., Kleesiek, J.: Review of disentanglement approaches for medical applications\u2013towards solving the Gordian knot of generative models in healthcare. arXiv preprint arXiv:2203.11132 (2022)","DOI":"10.36227\/techrxiv.19364897"},{"key":"7_CR15","unstructured":"Gabbay, A., Hoshen, Y.: Demystifying inter-class disentanglement. arXiv preprint arXiv:1906.11796 (2019)"},{"key":"7_CR16","unstructured":"Garnelo, M., et al.: Neural processes. arXiv preprint arXiv:1807.01622 (2018)"},{"key":"7_CR17","unstructured":"Gondal, M.W., et al.: On the transfer of inductive bias from simulation to the real world: a new disentanglement dataset. In: NeurIPS (2019)"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Haga, T., Kera, H., Kawamoto, K.: Sequential variational autoencoder with adversarial classifier for video disentanglement. Sensors 23(5), 2515 (2023)","DOI":"10.3390\/s23052515"},{"key":"7_CR19","unstructured":"Han, J., Min, M.R., Han, L., Li, L.E., Zhang, X.: Disentangled recurrent wasserstein autoencoder. arXiv preprint arXiv:2101.07496 (2021)"},{"key":"7_CR20","unstructured":"Higgins, I., et al.: beta-VAE: learning basic visual concepts with a constrained variational framework. In: ICLR (2016)"},{"key":"7_CR21","doi-asserted-by":"crossref","unstructured":"Hsu, W.N., Glass, J.: Scalable factorized hierarchical variational autoencoder training. arXiv preprint arXiv:1804.03201 (2018)","DOI":"10.21437\/Interspeech.2018-1034"},{"key":"7_CR22","unstructured":"Hsu, W.N., Zhang, Y., Glass, J.: Unsupervised learning of disentangled and interpretable representations from sequential data. In: NeurIPS (2017)"},{"key":"7_CR23","unstructured":"Huang, C.W., Krueger, D., Lacoste, A., Courville, A.: Neural autoregressive flows. In: ICML (2018)"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aila, T.: A style-based generator architecture for generative adversarial networks. In: CVPR (2019)","DOI":"10.1109\/CVPR.2019.00453"},{"key":"7_CR25","doi-asserted-by":"crossref","unstructured":"Karras, T., Laine, S., Aittala, M., Hellsten, J., Lehtinen, J., Aila, T.: Analyzing and improving the image quality of styleGAN. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00813"},{"key":"7_CR26","unstructured":"Kim, H., Mnih, A.: Disentangling by factorising. In: ICML (2018)"},{"key":"7_CR27","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes. arXiv preprint arXiv:1312.6114 (2013)"},{"key":"7_CR28","unstructured":"Kingma, D.P., Salimans, T., Jozefowicz, R., Chen, X., Sutskever, I., Welling, M.: Improved variational inference with inverse autoregressive flow. In: NeurIPS (2016)"},{"key":"7_CR29","unstructured":"Li, Y., Mandt, S.: Disentangled sequential autoencoder. arXiv preprint arXiv:1803.02991 (2018)"},{"key":"7_CR30","unstructured":"Lippe, P., Magliacane, S., L\u00f6we, S., Asano, Y.M., Cohen, T., Gavves, S.: CITRIS: causal identifiability from temporal intervened sequences. In: ICML (2022)"},{"key":"7_CR31","doi-asserted-by":"crossref","unstructured":"Liu, X., Sanchez, P., Thermos, S., O\u2019Neil, A.Q., Tsaftaris, S.A.: Learning disentangled representations in the imaging domain. Med. Image Anal. 80, 102516 (2022)","DOI":"10.1016\/j.media.2022.102516"},{"key":"7_CR32","unstructured":"Locatello, F., et al.: Challenging common assumptions in the unsupervised learning of disentangled representations. In: ICML (2019)"},{"key":"7_CR33","unstructured":"Locatello, F., Poole, B., R\u00e4tsch, G., Sch\u00f6lkopf, B., Bachem, O., Tschannen, M.: Weakly-supervised disentanglement without compromises. In: ICML (2020)"},{"key":"7_CR34","unstructured":"Locatello, F., Tschannen, M., Bauer, S., R\u00e4tsch, G., Sch\u00f6lkopf, B., Bachem, O.: Disentangling factors of variation using few labels. arXiv preprint arXiv:1905.01258 (2019)"},{"key":"7_CR35","doi-asserted-by":"crossref","unstructured":"Luo, Y.J., Ewert, S., Dixon, S.: Towards robust unsupervised disentanglement of sequential data\u2013a case study using music audio. arXiv preprint arXiv:2205.05871 (2022)","DOI":"10.24963\/ijcai.2022\/458"},{"key":"7_CR36","unstructured":"Ma, X., Kong, X., Zhang, S., Hovy, E.: Decoupling global and local representations via invertible generative flows. arXiv preprint arXiv:2004.11820 (2020)"},{"key":"7_CR37","doi-asserted-by":"crossref","unstructured":"Marino, J., Chen, L., He, J., Mandt, S.: Improving sequential latent variable models with autoregressive flows. In: Symposium on Advances in Approximate Bayesian Inference (2020)","DOI":"10.1007\/s10994-021-06092-6"},{"key":"7_CR38","unstructured":"Matthey, L., Higgins, I., Hassabis, D., Lerchner, A.: dSprites: Disentanglement testing sprites dataset. https:\/\/github.com\/deepmind\/dsprites-dataset\/ (2017)"},{"key":"7_CR39","unstructured":"Mita, G., Filippone, M., Michiardi, P.: An identifiable double VAE for disentangled representations. In: ICML (2021)"},{"key":"7_CR40","unstructured":"Morrow, R., Chiu, W.C.: Variational autoencoders with normalizing flow decoders. arXiv preprint arXiv:2004.05617 (2020)"},{"key":"7_CR41","unstructured":"Naiman, I., Berman, N., Azencot, O.: Sample and predict your latent: Modality-free sequential disentanglement via contrastive estimation. arXiv preprint arXiv:2305.15924 (2023)"},{"key":"7_CR42","unstructured":"Reed, S.E., Zhang, Y., Zhang, Y., Lee, H.: Deep visual analogy-making. NeurIPS (2015)"},{"key":"7_CR43","unstructured":"Rezende, D., Mohamed, S.: Variational inference with normalizing flows. In: ICML (2015)"},{"key":"7_CR44","unstructured":"Tian, Y., et al.: A good image generator is what you need for high-resolution video synthesis. arXiv preprint arXiv:2104.15069 (2021)"},{"key":"7_CR45","unstructured":"Tonekaboni, S., Li, C.L., Arik, S.O., Goldenberg, A., Pfister, T.: Decoupling local and global representations of time series. In: International Conference on Artificial Intelligence and Statistics (2022)"},{"key":"7_CR46","doi-asserted-by":"crossref","unstructured":"Tulyakov, S., Liu, M.Y., Yang, X., Kautz, J.: MoCoGAN: decomposing motion and content for video generation. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00165"},{"key":"7_CR47","unstructured":"Vahdat, A., Kautz, J.: NVAE: a deep hierarchical variational autoencoder. In: NeurIPS (2020)"},{"key":"7_CR48","unstructured":"Villegas, R., Yang, J., Hong, S., Lin, X., Lee, H.: Decomposing motion and content for natural video sequence prediction. arXiv preprint arXiv:1706.08033 (2017)"},{"key":"7_CR49","unstructured":"Von\u00a0K\u00fcgelgen, J., et al.: Self-supervised learning with data augmentations provably isolates content from style. In: NeurIPS (2021)"},{"key":"7_CR50","doi-asserted-by":"crossref","unstructured":"Vural, E., Frossard, P.: Learning pattern transformation manifolds for classification. In: ICIP (2012)","DOI":"10.1109\/ICIP.2012.6467072"},{"key":"7_CR51","unstructured":"Wang, X., Chen, H., Tang, S., Wu, Z., Zhu, W.: Disentangled representation learning. arXiv preprint arXiv:2211.11695 (2022)"},{"key":"7_CR52","doi-asserted-by":"crossref","unstructured":"Wang, Y., Bilinski, P., Bremond, F., Dantcheva, A.: G3AN: disentangling appearance and motion for video generation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00531"},{"key":"7_CR53","unstructured":"Winkler, C., Worrall, D., Hoogeboom, E., Welling, M.: Learning likelihoods with conditional normalizing flows. arXiv preprint arXiv:1912.00042 (2019)"},{"key":"7_CR54","unstructured":"Yang, M., Liu, F., Chen, Z., Shen, X., Hao, J., Wang, J.: CausalVAE: Structured causal disentanglement in variational autoencoder. arXiv preprint arXiv:2004.08697 (2020)"},{"key":"7_CR55","doi-asserted-by":"crossref","unstructured":"Yang, M., Liu, F., Chen, Z., Shen, X., Hao, J., Wang, J.: CausalVAE: disentangled representation learning via neural structural causal models. In: CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00947"},{"key":"7_CR56","doi-asserted-by":"crossref","unstructured":"Ye, X., Bilodeau, G.A.: A unified model for continuous conditional video prediction. In: CVPR (2023)","DOI":"10.1109\/CVPRW59228.2023.00368"},{"key":"7_CR57","unstructured":"Yin, D., Ren, X., Luo, C., Wang, Y., Xiong, Z., Zeng, W.: Retriever: Learning content-style representation as a token-level bipartite graph. arXiv preprint arXiv:2202.12307 (2022)"},{"key":"7_CR58","unstructured":"Zhao, S., Song, J., Ermon, S.: Towards deeper understanding of variational autoencoding models. arXiv preprint arXiv:1702.08658 (2017)"},{"key":"7_CR59","unstructured":"Zhu, X., Xu, C., Tao, D.: Commutative lie group VAE for disentanglement learning. ICML (2021)"},{"key":"7_CR60","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Min, M.R., Kadav, A., Graf, H.P.: S3VAE: self-supervised sequential VAE for representation disentanglement and data generation. In: CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00657"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73226-3_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,31]],"date-time":"2024-10-31T15:13:25Z","timestamp":1730387605000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73226-3_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,1]]},"ISBN":["9783031732256","9783031732263"],"references-count":60,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73226-3_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,11,1]]},"assertion":[{"value":"1 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}