{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:59:50Z","timestamp":1767322790282,"version":"3.48.0"},"publisher-location":"Cham","reference-count":34,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032113160","type":"print"},{"value":"9783032113177","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-11317-7_47","type":"book-chapter","created":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:56:27Z","timestamp":1767322587000},"page":"581-592","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Unveiling Visual Features in\u00a0Artwork Classification: Towards Explainable Vision Transformers in\u00a0the\u00a0Arts"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-7512-7661","authenticated-orcid":false,"given":"Raffaele","family":"Scaringi","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-6602-7504","authenticated-orcid":false,"given":"Nicola","family":"Fanelli","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0883-2691","authenticated-orcid":false,"given":"Gennaro","family":"Vessio","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6489-8628","authenticated-orcid":false,"given":"Giovanna","family":"Castellano","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,2]]},"reference":[{"key":"47_CR1","doi-asserted-by":"publisher","first-page":"35479","DOI":"10.1109\/ACCESS.2023.3266093","volume":"11","author":"AB Amjoud","year":"2023","unstructured":"Amjoud, A.B., Amrouch, M.: Object detection using deep learning, CNNs and vision transformers: a review. IEEE Access 11, 35479\u201335516 (2023)","journal-title":"IEEE Access"},{"key":"47_CR2","doi-asserted-by":"crossref","unstructured":"Aslan, S., Castellano, G., Digeno, V., Migailo, G., Scaringi, R., Vessio, G.: Recognizing the emotions evoked by artworks through visual features and knowledge graph-embeddings. In: International Conference on Image Analysis and Processing, pp. 129\u2013140. Springer (2022)","DOI":"10.1007\/978-3-031-13321-3_12"},{"key":"47_CR3","unstructured":"Bai, S., et alet\u00a0al.: Qwen2. 5-VL Technical Report. arXiv preprint arXiv:2502.13923 (2025)"},{"key":"47_CR4","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2022.108859","volume":"248","author":"G Castellano","year":"2022","unstructured":"Castellano, G., Digeno, V., Sansaro, G., Vessio, G.: Leveraging knowledge graphs and deep learning for automatic art analysis. Knowl.-Based Syst. 248, 108859 (2022)","journal-title":"Knowl.-Based Syst."},{"key":"47_CR5","doi-asserted-by":"publisher","first-page":"6599","DOI":"10.1007\/s11042-020-09995-z","volume":"80","author":"G Castellano","year":"2021","unstructured":"Castellano, G., Lella, E., Vessio, G.: Visual link retrieval and knowledge discovery in painting datasets. Multimedia Tools Appl 80, 6599\u20136616 (2021)","journal-title":"Multimedia Tools Appl"},{"key":"47_CR6","doi-asserted-by":"crossref","unstructured":"Castellano, G., Scaringi, R., Vessio, G.: Recognizing the style, genre, and emotion of a work of art through visual and knowledge graph embeddings. In: International Conference of the Italian Association for Artificial Intelligence, pp. 427\u2013440. Springer (2023)","DOI":"10.1007\/978-3-031-47546-7_29"},{"issue":"11","key":"47_CR7","doi-asserted-by":"publisher","first-page":"2590","DOI":"10.1007\/s11263-022-01664-y","volume":"130","author":"G Castellano","year":"2022","unstructured":"Castellano, G., Vessio, G.: A deep learning approach to clustering visual arts. Int. J. Comput. Vision 130(11), 2590\u20132605 (2022)","journal-title":"Int. J. Comput. Vision"},{"key":"47_CR8","doi-asserted-by":"publisher","first-page":"73694","DOI":"10.1109\/ACCESS.2019.2921101","volume":"7","author":"E Cetinic","year":"2019","unstructured":"Cetinic, E., Lipic, T., Grgic, S.: A deep learning perspective on beauty, sentiment, and remembrance of art. IEEE access 7, 73694\u201373710 (2019)","journal-title":"IEEE access"},{"key":"47_CR9","doi-asserted-by":"crossref","unstructured":"Cetinic, E., She, J.: Understanding and creating art with AI: Review and outlook. ACM Trans. Multimedia Comput. Commun. Appl. (TOMM) 18(2), 1\u201322 (2022)","DOI":"10.1145\/3475799"},{"key":"47_CR10","doi-asserted-by":"crossref","unstructured":"Chen, C.F.R., Fan, Q., Panda, R.: Crossvit: Cross-attention multi-scale vision transformer for image classification. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 357\u2013366 (2021)","DOI":"10.1109\/ICCV48922.2021.00041"},{"key":"47_CR11","doi-asserted-by":"crossref","unstructured":"Chen, L., Yang, J.: Recognizing the style of visual arts via adaptive cross-layer correlation. In: Proceedings of the 27th ACM International Conference on Multimedia, pp. 2459\u20132467 (2019)","DOI":"10.1145\/3343031.3350977"},{"key":"47_CR12","doi-asserted-by":"crossref","unstructured":"Dani, M., Rio-Torto, I., Alaniz, S., Akata, Z.: DeViL: decoding vision features into language. In: DAGM German Conference on Pattern Recognition, pp. 363\u2013377. Springer (2023)","DOI":"10.1007\/978-3-031-54605-1_24"},{"key":"47_CR13","doi-asserted-by":"crossref","unstructured":"Fanelli, N., Vessio, G., Castellano, G.: I dream my painting: Connecting MLLMS and diffusion models via prompt generation for text-guided multi-mask inpainting. In: 2025 IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV), pp. 6073\u20136082. IEEE (2025)","DOI":"10.1109\/WACV61041.2025.00592"},{"issue":"1","key":"47_CR14","doi-asserted-by":"publisher","first-page":"17","DOI":"10.1007\/s13735-019-00189-4","volume":"9","author":"N Garcia","year":"2020","unstructured":"Garcia, N., Renoust, B., Nakashima, Y.: ContextNet: representation and exploration for painting classification and retrieval in context. Int. J. Multimedia Inf. Retrieval 9(1), 17\u201330 (2020)","journal-title":"Int. J. Multimedia Inf. Retrieval"},{"key":"47_CR15","unstructured":"Ghiasi, A., et al.: Goldstein, T.: What do vision transformers learn? A visual exploration. arXiv preprint arXiv:2212.06727 (2022)"},{"key":"47_CR16","doi-asserted-by":"crossref","unstructured":"Grover, A., Leskovec, J.: node2vec: Scalable feature learning for networks. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, pp. 855\u2013864 (2016)","DOI":"10.1145\/2939672.2939754"},{"issue":"1","key":"47_CR17","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1109\/TPAMI.2022.3152247","volume":"45","author":"K Han","year":"2022","unstructured":"Han, K., et al.: A survey on vision transformer. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 87\u2013110 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"47_CR18","unstructured":"Karayev, S., et al.: Recognizing image style. arXiv preprint arXiv:1311.3715 (2013)"},{"key":"47_CR19","unstructured":"Kazemi, H., Chegini, A., Geiping, J., Feizi, S., Goldstein, T.: What do we learn from inverting CLIP models? arXiv preprint arXiv:2403.02580 (2024)"},{"key":"47_CR20","doi-asserted-by":"crossref","unstructured":"Kendall, A., Gal, Y., Cipolla, R.: Multi-task learning using uncertainty to weigh losses for scene geometry and semantics. In: Proceedings of the IEEE Conference on Computer Vision and Pattern recognition, pp. 7482\u20137491 (2018)","DOI":"10.1109\/CVPR.2018.00781"},{"key":"47_CR21","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"47_CR22","doi-asserted-by":"crossref","unstructured":"Li, X., et al.: OMG-Seg: Is one model good enough for all segmentation? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 27948\u201327959 (2024)","DOI":"10.1109\/CVPR52733.2024.02640"},{"issue":"12","key":"47_CR23","doi-asserted-by":"publisher","first-page":"6999","DOI":"10.1109\/TNNLS.2021.3084827","volume":"33","author":"Z Li","year":"2021","unstructured":"Li, Z., Liu, F., Yang, W., Peng, S., Zhou, J.: A survey of convolutional neural networks: analysis, applications, and prospects. IEEE Trans. Neural Netw. Learn. Syst. 33(12), 6999\u20137019 (2021)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"47_CR24","unstructured":"Loshchilov, I., Hutter, F.: Sgdr: Stochastic gradient descent with warm restarts. arXiv preprint arXiv:1608.03983 (2016)"},{"key":"47_CR25","unstructured":"Loshchilov, I., Hutter, F.: Decoupled weight decay regularization. arXiv preprint arXiv:1711.05101 (2017)"},{"key":"47_CR26","doi-asserted-by":"crossref","unstructured":"Mahendran, A., Vedaldi, A.: Understanding deep image representations by inverting them. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5188\u20135196 (2015)","DOI":"10.1109\/CVPR.2015.7299155"},{"key":"47_CR27","unstructured":"Natarajan, P., Nambiar, A.: Vale: A multimodal visual and language explanation framework for image classifiers using explainable ai and language models. arXiv preprint arXiv:2408.12808 (2024)"},{"issue":"11","key":"47_CR28","volume":"2","author":"C Olah","year":"2017","unstructured":"Olah, C., Mordvintsev, A., Schubert, L.: Feature visualization. Distill 2(11), e7 (2017)","journal-title":"Feature visualization. Distill"},{"key":"47_CR29","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PmLR (2021)"},{"key":"47_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.knosys.2024.112857","volume":"310","author":"R Scaringi","year":"2025","unstructured":"Scaringi, R., Fiameni, G., Vessio, G., Castellano, G.: GraphCLIP: Image-graph contrastive learning for multimodal artwork classification. Knowl.-Based Syst. 310, 112857 (2025)","journal-title":"Knowl.-Based Syst."},{"key":"47_CR31","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"47_CR32","doi-asserted-by":"crossref","unstructured":"Wang, Z., Zhao, L., Xing, W.: Stylediffusion: Controllable disentangled style transfer via diffusion models. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7677\u20137689 (2023)","DOI":"10.1109\/ICCV51070.2023.00706"},{"key":"47_CR33","doi-asserted-by":"crossref","unstructured":"Wu, J., Gan, W., Chen, Z., Wan, S., Yu, P.S.: Multimodal large language models: a survey. In: 2023 IEEE International Conference on Big Data (BigData), pp. 2247\u20132256. IEEE (2023)","DOI":"10.1109\/BigData59044.2023.10386743"},{"key":"47_CR34","doi-asserted-by":"crossref","unstructured":"Xiao, Y., Yuan, Q., Jiang, K., He, J., Lin, C.W., Zhang, L.: TTST: A top-k token selective transformer for remote sensing image super-resolution. IEEE Trans. Image Process. (2024)","DOI":"10.1109\/TIP.2023.3349004"}],"container-title":["Lecture Notes in Computer Science","Image Analysis and Processing - ICIAP 2025 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-11317-7_47","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,2]],"date-time":"2026-01-02T02:56:30Z","timestamp":1767322590000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-11317-7_47"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9783032113160","9783032113177"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-11317-7_47","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors declare that they have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"ICIAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Image Analysis and Processing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Rome","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 September 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"19 September 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"23","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"iciap2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.iciap.org\/home","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}