{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,4]],"date-time":"2026-04-04T19:07:41Z","timestamp":1775329661396,"version":"3.50.1"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031730238","type":"print"},{"value":"9783031730245","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,24]],"date-time":"2024-11-24T00:00:00Z","timestamp":1732406400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-73024-5_3","type":"book-chapter","created":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T16:40:25Z","timestamp":1732552825000},"page":"35-50","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["Deciphering the\u00a0Role of\u00a0Representation Disentanglement: Investigating Compositional Generalization in\u00a0CLIP Models"],"prefix":"10.1007","author":[{"given":"Reza","family":"Abbasi","sequence":"first","affiliation":[]},{"given":"Mohammad Hossein","family":"Rohban","sequence":"additional","affiliation":[]},{"given":"Mahdieh Soleymani","family":"Baghshah","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,24]]},"reference":[{"key":"3_CR1","unstructured":"Liu, J., et al.: Towards out-of-distribution generalization: a survey. arXiv preprint arXiv:2108.13624 (2021)"},{"key":"3_CR2","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"3_CR3","unstructured":"Fang, A., et al.: Data determines distributional robustness in contrastive language image pre-training (CLIP). In: International Conference on Machine Learning, pp. 6216\u20136234. PMLR (2022)"},{"key":"3_CR4","unstructured":"Nguyen, T., Ilharco, G., Wortsman, M., Oh, S., Schmidt, L.: Quality not quantity: on the interaction between dataset design and robustness of CLIP. arXiv preprint arXiv:2208.05516 (2022)"},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Wang, Z., Hershcovich, D.: On evaluating multilingual compositional generalization with translated datasets. In: Rogers, A., Boyd-Graber, J., Okazaki, N. (eds.) Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), , Toronto, Canada, pp. 1669\u20131687. Association for Computational Linguistics (2023)","DOI":"10.18653\/v1\/2023.acl-long.93"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Shaw, P., Chang, M.-W., Pasupat, P., Toutanova, K.: Compositional generalization and natural language variation: can a semantic parsing approach handle both? In: Zong, C., Xia, F., Li, W., Navigli, R., (eds.) Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), pp. 922\u2013938. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.acl-long.75"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Mehta, S.V., Rao, J., Tay, Y., Kale, M., Parikh, A.P., Strubell, E.: Improving compositional generalization with self-training for data-to-text generation. arXiv preprint arXiv:2110.08467 (2021)","DOI":"10.18653\/v1\/2022.acl-long.289"},{"key":"3_CR8","unstructured":"Yuksekgonul, M., Bianchi, F., Kalluri, P., Jurafsky, D., Zou, J.: When and why vision-language models behave like bags-of-words, and what to do about it? In: The Eleventh International Conference on Learning Representations (2023)"},{"key":"3_CR9","unstructured":"Lewis, M., et al.: Does clip bind concepts? Probing compositionality in large image models (2023)"},{"key":"3_CR10","unstructured":"Zhao, T., et al.: VL-CheckList: evaluating pre-trained vision-language models with objects, attributes and relations (2023)"},{"key":"3_CR11","doi-asserted-by":"crossref","unstructured":"Thrush, T., et al.: WinoGround: probing vision and language models for visio-linguistic compositionality (2022)","DOI":"10.1109\/CVPR52688.2022.00517"},{"key":"3_CR12","unstructured":"Ossowski, T., Jiang, M., Hu, J.: Prompting large vision-language models for compositional reasoning (2024)"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Zhang, J., Cai, M., Xie, T., Lee, Y.J.: CounterCurate: enhancing physical and semantic visio-linguistic compositional reasoning via counterfactual examples (2024)","DOI":"10.18653\/v1\/2024.findings-acl.915"},{"key":"3_CR14","unstructured":"Wang, H., Si, H., Shao, H., Zhao, H.: Enhancing compositional generalization via compositional feature alignment (2024)"},{"key":"3_CR15","unstructured":"Doveh, S., et al.: Dense and aligned captions (DAC) promote compositional reasoning in VL models (2023)"},{"key":"3_CR16","doi-asserted-by":"crossref","unstructured":"Pham, K., et al.: Learning to predict visual attributes in the wild (2021)","DOI":"10.1109\/CVPR46437.2021.01282"},{"key":"3_CR17","unstructured":"Douze, M., et al.: The faiss library (2024)"},{"key":"3_CR18","unstructured":"Ilharco, G., et al.: OpenCLIP (2021)"},{"key":"3_CR19","unstructured":"Yang, T., Wang, Y., Lan, C., Yan, L., Zheng, N.: Vector-based representation is the key: a study on disentanglement and compositional generalization (2023)"},{"key":"3_CR20","unstructured":"Montero, M.L., Ludwig, C.J.H., Costa, R.P., Malhotra, G., Bowers, J.: The role of disentanglement in generalisation. In: International Conference on Learning Representations (2021)"},{"key":"3_CR21","unstructured":"Xu, Z., Niethammer, M., Raffel, C.A.: Compositional generalization in unsupervised compositional representation learning: A study on disentanglement and emergent language. In: Advances in Neural Information Processing Systems, vol. 35, pp. 25074\u201325087 (2022)"},{"key":"3_CR22","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: International Conference on Machine Learning, pp. 1597\u20131607. PMLR (2020)"},{"key":"3_CR23","unstructured":"Higgins, I., et al.: beta-VAE: learning basic visual concepts with a constrained variational framework. In: International Conference on Learning Representations (2016)"},{"key":"3_CR24","unstructured":"Eastwood, C., Williams, C.K.I.: A framework for the quantitative evaluation of disentangled representations. In: International Conference on Learning Representations (2018)"},{"key":"3_CR25","unstructured":"Ridgeway, K., Mozer, M.C.: Learning deep disentangled embeddings with the F-statistic loss. In: Advances in Neural Information Processing Systems, vol. 31 (2018)"},{"key":"3_CR26","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Wang, X., Chen, H., Tang, S., Wu, Z., Zhu, W.: Disentangled representation learning (2023)","DOI":"10.1145\/3581783.3613859"},{"key":"3_CR28","unstructured":"Li, Y., Mandt, S.: Disentangled sequential autoencoder (2018)"},{"key":"3_CR29","unstructured":"Burgess, C., Kim, H.: 3D shapes dataset (2018). https:\/\/github.com\/deepmind\/3dshapes-dataset\/"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-73024-5_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,25]],"date-time":"2024-11-25T17:04:30Z","timestamp":1732554270000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-73024-5_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,24]]},"ISBN":["9783031730238","9783031730245"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-73024-5_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,24]]},"assertion":[{"value":"24 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}