{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:59:00Z","timestamp":1767322740930,"version":"3.48.0"},"publisher-location":"Cham","reference-count":29,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783032113160"},{"type":"electronic","value":"9783032113177"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-11317-7_7","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:53:58Z","timestamp":1767322438000},"page":"77-88","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["A Pilot Study Exploring the\u00a0Alignment of\u00a0Humans and\u00a0CNN During Perception of\u00a0Social Interactions"],"prefix":"10.1007","author":[{"given":"Guido","family":"Vallarino","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Lucia","family":"Schiatti","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Matteo","family":"Moro","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yen-Ling","family":"Kuo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mengmi","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Monica","family":"Gori","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Boris","family":"Katz","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Andrei","family":"Barbu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alessio","family":"Del Bue","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"7_CR1","unstructured":"https:\/\/github.com\/open-mmlab\/mmaction2"},{"key":"7_CR2","doi-asserted-by":"publisher","first-page":"99","DOI":"10.1016\/j.patrec.2019.01.019","volume":"137","author":"A Barbu","year":"2020","unstructured":"Barbu, A., Banda, D., Katz, B.: Deep video-to-video transformations for accessibility with an application to photosensitivity. Pattern Recogn. Lett. 137, 99\u2013107 (2020)","journal-title":"Pattern Recogn. Lett."},{"key":"7_CR3","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"331","DOI":"10.1007\/978-3-030-11012-3_27","volume-title":"Computer Vision \u2013 ECCV 2018 Workshops","author":"G Boccignone","year":"2019","unstructured":"Boccignone, G., Cuculo, V., D\u2019Amelio, A., Grossi, G., Lanzarotti, R.: Give ear to my face: modelling multimodal attention to social interactions. In: Leal-Taix\u00e9, L., Roth, S. (eds.) ECCV 2018. LNCS, vol. 11130, pp. 331\u2013345. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-11012-3_27"},{"key":"7_CR4","unstructured":"Bylinskii, Z., et al.: MIT saliency benchmark (2015)"},{"issue":"3","key":"7_CR5","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1109\/TPAMI.2018.2815601","volume":"41","author":"Z Bylinskii","year":"2018","unstructured":"Bylinskii, Z., Judd, T., Oliva, A., Torralba, A., Durand, F.: What do different evaluation metrics tell us about saliency models? IEEE Trans. Pattern Anal. Mach. Intell. 41(3), 740\u2013757 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"7_CR6","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"7_CR7","unstructured":"Cartella, G., et al.: Trends, applications, and challenges in human attention modelling. In: IJCAI (2024)"},{"key":"7_CR8","first-page":"9432","volume":"35","author":"T Fel","year":"2022","unstructured":"Fel, T., Felipe, I., Linsley, D., Serre, T.: Harmonizing the object recognition strategies of deep neural networks with humans. Adv. Neural. Inf. Process. Syst. 35, 9432 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"7_CR9","first-page":"13890","volume":"33","author":"R Geirhos","year":"2020","unstructured":"Geirhos, R., Meding, K., Wichmann, F.A.: Beyond accuracy: quantifying trial-by-trial behaviour of CNNs and humans by measuring error consistency. Adv. Neural. Inf. Process. Syst. 33, 13890\u201313902 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"issue":"6","key":"7_CR10","doi-asserted-by":"publisher","first-page":"475","DOI":"10.1016\/j.tics.2021.01.006","volume":"25","author":"A Hafri","year":"2021","unstructured":"Hafri, A., Firestone, C.: The perception of relations. Trends Cogn. Sci. 25(6), 475\u2013492 (2021)","journal-title":"Trends Cogn. Sci."},{"key":"7_CR11","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"issue":"1\u20132","key":"7_CR12","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/j.cviu.2004.10.009","volume":"100","author":"T Jost","year":"2005","unstructured":"Jost, T., Ouerhani, N., Von Wartburg, R., M\u00fcri, R., H\u00fcgli, H.: Assessing the contribution of color in visual attention. Comput. Vis. Image Underst. 100(1\u20132), 107\u2013123 (2005)","journal-title":"Comput. Vis. Image Underst."},{"key":"7_CR13","unstructured":"Judd, T., Durand, F., Torralba, A.: A benchmark of computational models of saliency to predict human fixations. In: MIT Technical report (2012)"},{"key":"7_CR14","unstructured":"Kay, W., et\u00a0al.: The kinetics human action video dataset. arXiv preprint arXiv:1705.06950 (2017)"},{"key":"7_CR15","unstructured":"Lee, J., et al.: VisAlign: dataset for measuring the alignment between AI and humans in visual perception. Adv. Neural. Inf. Process. Syst. 36, 77119\u201377148 (2023)"},{"issue":"12","key":"7_CR16","doi-asserted-by":"publisher","first-page":"1165","DOI":"10.1016\/j.tics.2023.09.001","volume":"27","author":"E McMahon","year":"2023","unstructured":"McMahon, E., Isik, L.: Seeing social interactions. Trends Cogn. Sci. 27(12), 1165\u20131179 (2023)","journal-title":"Trends Cogn. Sci."},{"key":"7_CR17","doi-asserted-by":"crossref","unstructured":"Mertens, L.P.: Modeling social cognition and its neurologic deficits with artificial neural networks. In: Proceedings of the 25th International Conference on Multimodal Interaction, pp. 726\u2013730 (2023)","DOI":"10.1145\/3577190.3614232"},{"key":"7_CR18","doi-asserted-by":"crossref","unstructured":"Netanyahu, A., Shu, T., Katz, B., Barbu, A., Tenenbaum, J.B.: Phase: physically-grounded abstract social events for machine social perception. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 845\u2013853 (2021)","DOI":"10.1609\/aaai.v35i1.16167"},{"issue":"8","key":"7_CR19","doi-asserted-by":"publisher","first-page":"2648","DOI":"10.1111\/cogs.12670","volume":"42","author":"JC Peterson","year":"2018","unstructured":"Peterson, J.C., Abbott, J.T., Griffiths, T.L.: Evaluating (and improving) the correspondence between deep neural networks and human representations. Cogn. Sci. 42(8), 2648\u20132669 (2018)","journal-title":"Cogn. Sci."},{"key":"7_CR20","doi-asserted-by":"crossref","unstructured":"Schiatti, L., et al.: Modeling visual impairments with artificial neural networks: a review. In: 2023 IEEE\/CVF International Conference on Computer Vision Workshops (ICCVW), pp. 1979\u20131991 (2023)","DOI":"10.1109\/ICCVW60793.2023.00213"},{"issue":"3","key":"7_CR21","doi-asserted-by":"publisher","first-page":"413","DOI":"10.1016\/j.neuron.2020.07.040","volume":"108","author":"M Schrimpf","year":"2020","unstructured":"Schrimpf, M., Kubilius, J., Lee, M.J., Murty, N.A.R., Ajemian, R., DiCarlo, J.J.: Integrative benchmarking to advance neurally mechanistic models of human intelligence. Neuron 108(3), 413\u2013423 (2020)","journal-title":"Neuron"},{"key":"7_CR22","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1007\/s11263-019-01228-7","volume":"128","author":"RR Selvaraju","year":"2020","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. Int. J. Comput. Vision 128, 336\u2013359 (2020)","journal-title":"Int. J. Comput. Vision"},{"key":"7_CR23","doi-asserted-by":"crossref","unstructured":"Szegedy, C., et al.: Going deeper with convolutions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"7_CR24","doi-asserted-by":"crossref","unstructured":"Tejwani, R., et al.: Incorporating rich social interactions into MDPs. In: 2022 International Conference on Robotics and Automation (ICRA), pp. 7395\u20137401. IEEE (2022)","DOI":"10.1109\/ICRA46639.2022.9811991"},{"key":"7_CR25","unstructured":"Ullman, T., Baker, C., Macindoe, O., Evans, O., Goodman, N., Tenenbaum, J.: Help or hinder: Bayesian models of social goal inference. Adv. Neural Inf. Process. Syst. 22 (2009)"},{"issue":"1","key":"7_CR26","doi-asserted-by":"publisher","first-page":"3730","DOI":"10.1038\/s41467-018-06217-x","volume":"9","author":"M Zhang","year":"2018","unstructured":"Zhang, M., Feng, J., Ma, K.T., Lim, J.H., Zhao, Q., Kreiman, G.: Finding any waldo with zero-shot invariant and efficient visual search. Nat. Commun. 9(1), 3730 (2018)","journal-title":"Nat. Commun."},{"key":"7_CR27","doi-asserted-by":"crossref","unstructured":"Zhang, M., Tseng, C., Kreiman, G.: Putting visual object recognition in context. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12985\u201312994 (2020)","DOI":"10.1109\/CVPR42600.2020.01300"},{"key":"7_CR28","doi-asserted-by":"crossref","unstructured":"Zhang, R., Isola, P., Efros, A.A., Shechtman, E., Wang, O.: The unreasonable effectiveness of deep features as a perceptual metric. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 586\u2013595 (2018)","DOI":"10.1109\/CVPR.2018.00068"},{"issue":"5","key":"7_CR29","doi-asserted-by":"publisher","first-page":"439","DOI":"10.1007\/s11633-022-1348-x","volume":"19","author":"Q Zhou","year":"2022","unstructured":"Zhou, Q., Du, C., He, H.: Exploring the brain-like properties of deep neural networks: a neural encoding perspective. Mach. Intell. Res. 19(5), 439\u2013455 (2022)","journal-title":"Mach. Intell. Res."}],"container-title":["Lecture Notes in Computer Science","Image Analysis and Processing - ICIAP 2025 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-11317-7_7","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:54:00Z","timestamp":1767322440000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-11317-7_7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032113160","9783032113177"],"references-count":29,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-11317-7_7","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Image Analysis and Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iciap2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iciap.org\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}