{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T04:09:23Z","timestamp":1748664563111,"version":"3.41.0"},"publisher-location":"Cham","reference-count":57,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031926471","type":"print"},{"value":"9783031926488","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-92648-8_3","type":"book-chapter","created":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T16:28:17Z","timestamp":1748622497000},"page":"35-52","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Beyond the\u00a0Surface: A Comprehensive Analysis of\u00a0Implicit Bias in\u00a0Vision-Language Models"],"prefix":"10.1007","author":[{"given":"Giacomo","family":"Capitani","sequence":"first","affiliation":[]},{"given":"Alice","family":"Lucarini","sequence":"additional","affiliation":[]},{"given":"Lorenzo","family":"Bonicelli","sequence":"additional","affiliation":[]},{"given":"Federico","family":"Bolelli","sequence":"additional","affiliation":[]},{"given":"Simone","family":"Calderara","sequence":"additional","affiliation":[]},{"given":"Loris","family":"Vezzali","sequence":"additional","affiliation":[]},{"given":"Elisa","family":"Ficarra","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"3_CR1","unstructured":"Bai, X., Wang, A., Sucholutsky, I., Griffiths, T.L.: Measuring implicit bias in explicitly unbiased large language models. arXiv preprint arXiv:2402.04105 (2024)"},{"issue":"2","key":"3_CR2","doi-asserted-by":"publisher","first-page":"230","DOI":"10.1037\/0022-3514.71.2.230","volume":"71","author":"JA Bargh","year":"1996","unstructured":"Bargh, J.A., Chen, M., Burrows, L.: Automaticity of social behavior: direct effects of trait construct and stereotype activation on action. J. Pers. Soc. Psychol. 71(2), 230 (1996)","journal-title":"J. Pers. Soc. Psychol."},{"key":"3_CR3","doi-asserted-by":"crossref","unstructured":"Barraco, M., Stefanini, M., Cornia, M., Cascianelli, S., Baraldi, L., Cucchiara, R.: CaMEL: mean teacher learning for image captioning. In: 2022 26th International Conference on Pattern Recognition (ICPR), pp. 4087\u20134094. IEEE (2022)","DOI":"10.1109\/ICPR56361.2022.9955644"},{"issue":"2","key":"3_CR4","doi-asserted-by":"publisher","first-page":"300","DOI":"10.1037\/0022-3514.68.2.300","volume":"68","author":"CD Batson","year":"1995","unstructured":"Batson, C.D., Turk, C.L., Shaw, L.L., Klein, T.R.: Information function of empathic emotion: learning that we value the other\u2019s welfare. J. Pers. Soc. Psychol. 68(2), 300 (1995)","journal-title":"J. Pers. Soc. Psychol."},{"issue":"6334","key":"3_CR5","doi-asserted-by":"publisher","first-page":"183","DOI":"10.1126\/science.aal4230","volume":"356","author":"A Caliskan","year":"2017","unstructured":"Caliskan, A., Bryson, J.J., Narayanan, A.: Semantics derived automatically from language corpora contain human-like biases. Science 356(6334), 183\u2013186 (2017)","journal-title":"Science"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Capitani, G., Bolelli, F., Porrello, A., Calderara, S., Ficarra, E.: Clusterfix: a cluster-based debiasing approach without protected-group supervision. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 4870\u20134879 (2024)","DOI":"10.1109\/WACV57701.2024.00480"},{"key":"3_CR7","doi-asserted-by":"crossref","unstructured":"Changpinyo, S., Sharma, P., Ding, N., Soricut, R.: Conceptual 12M: pushing web-scale image-text pre-training to recognize long-tail visual concepts. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3558\u20133568 (2021)","DOI":"10.1109\/CVPR46437.2021.00356"},{"key":"3_CR8","doi-asserted-by":"crossref","unstructured":"Cherti, M., et al.: Reproducible scaling laws for contrastive language-image learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2818\u20132829 (2023)","DOI":"10.1109\/CVPR52729.2023.00276"},{"key":"3_CR9","unstructured":"Chuang, C.Y., Jampani, V., Li, Y., Torralba, A., Jegelka, S.: Debiasing vision-language models via biased prompts. arXiv preprint arXiv:2302.00070 (2023)"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Cohen, J.: Statistical Power Analysis for the Behavioral Sciences. Routledge (2013)","DOI":"10.4324\/9780203771587"},{"key":"3_CR11","unstructured":"Dehdashtian, S., Wang, L., Boddeti, V.N.: FairerCLIP: debiasing CLIP\u2019s zero-shot predictions using functions in RKHSs. arXiv preprint arXiv:2403.15593 (2024)"},{"key":"3_CR12","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth $$16\\,\\times \\,16$$ words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"issue":"2","key":"3_CR13","doi-asserted-by":"publisher","first-page":"77","DOI":"10.1016\/j.tics.2006.11.005","volume":"11","author":"ST Fiske","year":"2007","unstructured":"Fiske, S.T., Cuddy, A.J., Glick, P.: Universal dimensions of social cognition: warmth and competence. Trends Cogn. Sci. 11(2), 77\u201383 (2007)","journal-title":"Trends Cogn. Sci."},{"issue":"1","key":"3_CR14","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1186\/s12910-017-0179-8","volume":"18","author":"C FitzGerald","year":"2017","unstructured":"FitzGerald, C., Hurst, S.: Implicit bias in healthcare professionals: a systematic review. BMC Med. Ethics 18(1), 1\u201318 (2017)","journal-title":"BMC Med. Ethics"},{"key":"3_CR15","unstructured":"Frascaroli, E., Panariello, A., Buzzega, P., Bonicelli, L., Porrello, A., Calderara, S.: CLIP with generative latent replay: a strong baseline for incremental learning. In: Proceedings of 35th British Machine Vision Conference 2024 (BMVC) (2024)"},{"key":"3_CR16","unstructured":"Gadre, S.Y., et\u00a0al.: Datacomp: in search of the next generation of multimodal datasets. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"3_CR17","unstructured":"Geirhos, R., Meding, K., Wichmann, F.A.: Beyond accuracy: quantifying trial-by-trial behaviour of CNNs and humans by measuring error consistency. In: Advances in Neural Information Processing Systems, vol. 33, pp. 13890\u201313902 (2020)"},{"key":"3_CR18","unstructured":"Geirhos, R., Rubisch, P., Michaelis, C., Bethge, M., Wichmann, F.A., Brendel, W.: ImageNet-trained CNNs are biased towards texture; increasing shape bias improves accuracy and robustness. In: International Conference on Learning Representations (2019)"},{"issue":"1","key":"3_CR19","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1037\/0033-295X.102.1.4","volume":"102","author":"AG Greenwald","year":"1995","unstructured":"Greenwald, A.G., Banaji, M.R.: Implicit social cognition: attitudes, self-esteem, and stereotypes. Psychol. Rev. 102(1), 4 (1995)","journal-title":"Psychol. Rev."},{"key":"3_CR20","unstructured":"Grissom, R.J., Kim, J.J.: Effect Sizes for Research: A Broad Practical Approach. Lawrence Erlbaum Associates Publishers (2005)"},{"key":"3_CR21","doi-asserted-by":"crossref","unstructured":"Hambarde, K.A., Proenca, H.: Information retrieval: recent advances and beyond. IEEE Access (2023)","DOI":"10.1109\/ACCESS.2023.3295776"},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Hamilton, D.L.: Stereotyping and intergroup behavior: some thoughts on the cognitive approach. In: Cognitive Processes in Stereotyping and Intergroup Behavior, pp. 333\u2013353. Psychology Press (2015)","DOI":"10.4324\/9781315668758"},{"key":"3_CR23","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"5","key":"3_CR24","doi-asserted-by":"publisher","first-page":"859","DOI":"10.1037\/0022-3514.59.5.859","volume":"59","author":"DA Houston","year":"1990","unstructured":"Houston, D.A.: Empathy and the self: cognitive and emotional influences on the evaluation of negative affect in others. J. Pers. Soc. Psychol. 59(5), 859 (1990)","journal-title":"J. Pers. Soc. Psychol."},{"key":"3_CR25","unstructured":"Jeon, M., Lee, H., Seong, Y., Kang, M.: Learning without prejudices: continual unbiased learning via benign and malignant forgetting. In: The Eleventh International Conference on Learning Representations (2023)"},{"key":"3_CR26","doi-asserted-by":"publisher","first-page":"863","DOI":"10.3389\/fpsyg.2013.00863","volume":"4","author":"D Lakens","year":"2013","unstructured":"Lakens, D.: Calculating and reporting effect sizes to facilitate cumulative science: a practical primer for t-tests and anovas. Front. Psychol. 4, 863 (2013)","journal-title":"Front. Psychol."},{"key":"3_CR27","unstructured":"Lauren\u00e7on, H., et\u00a0al.: Obelics: an open web-scale filtered dataset of interleaved image-text documents. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"3_CR28","unstructured":"Li, X., Lipton, Z.C., Leqi, L.: Personalized language modeling from personalized human feedback. In: ICLR 2024 Workshop on Reliable and Responsible Foundation Models (2024)"},{"key":"3_CR29","doi-asserted-by":"crossref","unstructured":"Li, Z., Hoogs, A., Xu, C.: Discover and mitigate unknown biases with debiasing alternate networks. In: European Conference on Computer Vision, pp. 270\u2013288. Springer (2022)","DOI":"10.1007\/978-3-031-19778-9_16"},{"key":"3_CR30","unstructured":"Liu, E.Z., et al.: Just train twice: Improving group robustness without training group information. In: International Conference on Machine Learning, pp. 6781\u20136792. PMLR (2021)"},{"key":"3_CR31","doi-asserted-by":"crossref","unstructured":"Luo, Y., et\u00a0al.: FairCLIP: harnessing fairness in vision-language learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12289\u201312301 (2024)","DOI":"10.1109\/CVPR52733.2024.01168"},{"issue":"2","key":"3_CR32","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1037\/0033-2909.111.2.361","volume":"111","author":"KO McGraw","year":"1992","unstructured":"McGraw, K.O., Wong, S.P.: A common language effect size statistic. Psychol. Bull. 111(2), 361 (1992)","journal-title":"Psychol. Bull."},{"key":"3_CR33","unstructured":"Meng, Y., Xia, M., Chen, D.: Simpo: simple preference optimization with a reference-free reward. arXiv preprint arXiv:2405.14734 (2024)"},{"key":"3_CR34","unstructured":"Nam, J., Cha, H., Ahn, S., Lee, J., Shin, J.: Learning from failure: training debiased classifier from biased classifier. In: Advances in Neural Information Processing Systems, vol. 33, pp. 20673\u201320684 (2020)"},{"key":"3_CR35","unstructured":"Noble, S.U.: Algorithms of Oppression: How Search Engines Reinforce Racism. New York University Press (2018)"},{"key":"3_CR36","doi-asserted-by":"crossref","unstructured":"Oakden-Rayner, L., Dunnmon, J., Carneiro, G., R\u00e9, C.: Hidden stratification causes clinically meaningful failures in machine learning for medical imaging. In: Proceedings of the ACM Conference on Health, Inference, and Learning, pp. 151\u2013159 (2020)","DOI":"10.1145\/3368555.3384468"},{"issue":"6464","key":"3_CR37","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1126\/science.aax2342","volume":"366","author":"Z Obermeyer","year":"2019","unstructured":"Obermeyer, Z., Powers, B., Vogeli, C., Mullainathan, S.: Dissecting racial bias in an algorithm used to manage the health of populations. Science 366(6464), 447\u2013453 (2019)","journal-title":"Science"},{"issue":"3","key":"3_CR38","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1525\/aa.1964.66.3.02a00880","volume":"66","author":"CE Osgood","year":"1964","unstructured":"Osgood, C.E.: Semantic differential technique in the comparative study of cultures. Am. Anthropol. 66(3), 171\u2013200 (1964)","journal-title":"Am. Anthropol."},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Ponzio, F., Deodato, G., Macii, E., Di\u00a0Cataldo, S., Ficarra, E.: Exploiting \u201cuncertain\u201d deep networks for data cleaning in digital pathology. In: 2020 IEEE 17th International Symposium on Biomedical Imaging (ISBI), pp. 1139\u20131143. IEEE (2020)","DOI":"10.1109\/ISBI45749.2020.9098605"},{"key":"3_CR40","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"3_CR41","unstructured":"Rafailov, R., Sharma, A., Mitchell, E., Manning, C.D., Ermon, S., Finn, C.: Direct preference optimization: your language model is secretly a reward model. In: Advances in Neural Information Processing Systems, vol. 36 (2024)"},{"key":"3_CR42","unstructured":"Rahimian, H., Mehrotra, S.: Distributionally robust optimization: a review. arXiv preprint arXiv:1908.05659 (2019)"},{"key":"3_CR43","unstructured":"Sagawa, S., Koh, P.W., Hashimoto, T., Liang, P.: Distributionally robust neural networks. In: International Conference on Learning Representations (2020)"},{"key":"3_CR44","unstructured":"Sagawa, S., et\u00a0al.: Extending the wilds benchmark for unsupervised adaptation. arXiv preprint arXiv:2112.05090 (2021)"},{"key":"3_CR45","unstructured":"Sankaranarayanan, S., Hartvigsen, T., Oakden-Rayner, L., Ghassemi, M., Isola, P.: Real world relevance of generative counterfactual explanations. In: Workshop on Trustworthy and Socially Responsible Machine Learning, NeurIPS (2022)"},{"issue":"3","key":"3_CR46","doi-asserted-by":"publisher","first-page":"501","DOI":"10.1037\/0278-7393.13.3.501","volume":"13","author":"DL Schacter","year":"1987","unstructured":"Schacter, D.L.: Implicit memory: history and current status. J. Exp. Psychol. Learn. Mem. Cogn. 13(3), 501 (1987)","journal-title":"J. Exp. Psychol. Learn. Mem. Cogn."},{"key":"3_CR47","unstructured":"Schuhmann, C., et al.: Laion-5b: an open large-scale dataset for training next generation image-text models. In: Advances in Neural Information Processing Systems, vol. 35, pp. 25278\u201325294 (2022)"},{"key":"3_CR48","unstructured":"Schuhmann, C., et al.: Laion-400M: open dataset of clip-filtered 400 million image-text pairs. arXiv preprint arXiv:2111.02114 (2021)"},{"key":"3_CR49","doi-asserted-by":"crossref","unstructured":"Seo, S., Lee, J.Y., Han, B.: Unsupervised learning of debiased representations with pseudo-attributes. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 16742\u201316751 (2022)","DOI":"10.1109\/CVPR52688.2022.01624"},{"key":"3_CR50","unstructured":"Sohoni, N., Dunnmon, J., Angus, G., Gu, A., R\u00e9, C.: No subclass left behind: fine-grained robustness in coarse-grained classification problems. In: Advances in Neural Information Processing Systems, vol. 33, pp. 19339\u201319352 (2020)"},{"issue":"3","key":"3_CR51","doi-asserted-by":"publisher","first-page":"157","DOI":"10.1111\/j.1540-4560.1985.tb01134.x","volume":"41","author":"WG Stephan","year":"1985","unstructured":"Stephan, W.G., Stephan, C.W.: Intergroup anxiety. J. Soc. Issues 41(3), 157\u2013175 (1985)","journal-title":"J. Soc. Issues"},{"issue":"2","key":"3_CR52","doi-asserted-by":"publisher","first-page":"64","DOI":"10.1145\/2812802","volume":"59","author":"B Thomee","year":"2016","unstructured":"Thomee, B., et al.: YFCC100M: the new data in multimedia research. Commun. ACM 59(2), 64\u201373 (2016)","journal-title":"Commun. ACM"},{"issue":"5","key":"3_CR53","doi-asserted-by":"publisher","first-page":"423","DOI":"10.1002\/pits.20234","volume":"44","author":"B Thompson","year":"2007","unstructured":"Thompson, B.: Effect sizes, confidence intervals, and confidence intervals for effect sizes. Psychol. Sch. 44(5), 423\u2013432 (2007)","journal-title":"Psychol. Sch."},{"key":"3_CR54","doi-asserted-by":"crossref","unstructured":"Vieriu, R.L., Tulyakov, S., Semeniuta, S., Sangineto, E., Sebe, N.: Facial expression recognition under a wide range of head poses. In: 2015 11th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG), vol.\u00a01, pp.\u00a01\u20137. IEEE (2015)","DOI":"10.1109\/FG.2015.7163098"},{"key":"3_CR55","unstructured":"Xu, H., et al.: Demystifying clip data. arXiv preprint arXiv:2309.16671 (2023)"},{"key":"3_CR56","doi-asserted-by":"crossref","unstructured":"Yang, J.C., Korecki, M., Dailisan, D., Hausladen, C.I., Helbing, D.: LLM voting: human choices and AI collective decision making. arXiv preprint arXiv:2402.01766 (2024)","DOI":"10.1609\/aies.v7i1.31758"},{"key":"3_CR57","unstructured":"Zhang, M., Sohoni, N.S., Zhang, H.R., Finn, C., R\u00e9, C.: Correct-n-contrast: a contrastive approach for improving robustness to spurious correlations. arXiv preprint arXiv:2203.01517 (2022)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-92648-8_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,30]],"date-time":"2025-05-30T16:28:36Z","timestamp":1748622516000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-92648-8_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031926471","9783031926488"],"references-count":57,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-92648-8_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}