{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,16]],"date-time":"2026-06-16T04:44:26Z","timestamp":1781585066517,"version":"3.54.5"},"publisher-location":"Cham","reference-count":69,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031915710","type":"print"},{"value":"9783031915727","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91572-7_8","type":"book-chapter","created":{"date-parts":[[2025,5,23]],"date-time":"2025-05-23T04:15:14Z","timestamp":1747973714000},"page":"118-136","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Context-Infused Visual Grounding for\u00a0Art"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-6443-8250","authenticated-orcid":false,"given":"Selina","family":"Khan","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5145-3603","authenticated-orcid":false,"given":"Nanne","family":"van Noord","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"8_CR1","unstructured":"Paintings dataset (2014). https:\/\/www.robots.ox.ac.uk\/~vgg\/data\/paintings\/"},{"key":"8_CR2","unstructured":"Caspapaintings dataset (2018). https:\/\/people.cs.pitt.edu\/~chris\/artistic_objects\/"},{"key":"8_CR3","unstructured":"Artistic faces dataset (2019). https:\/\/faculty.runi.ac.il\/arik\/site\/foa\/artistic-faces-dataset.asp"},{"key":"8_CR4","unstructured":"Bartz, C., Jain, N., Krestel, R.: Automatic matching of paintings and descriptions in art-historic archives using multimodal analysis. In: Proceedings of the 1st International Workshop on Artificial Intelligence for Historical Image Enrichment and Access, pp. 23\u201328 (2020)"},{"key":"8_CR5","doi-asserted-by":"crossref","unstructured":"Bengamra, S., Mzoughi, O., Bigand, A., Zagrouba, E.: New challenges of face detection in paintings based on deep learning. In: Proceedings of the 16th International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications-Volume 4: VISAPP, pp. 311\u2013320 (2021)","DOI":"10.5220\/0010243703110320"},{"key":"8_CR6","doi-asserted-by":"crossref","unstructured":"Bengamra, S., Mzoughi, O., Bigand, A., Zagrouba, E.: A comprehensive survey on object detection in visual art: taxonomy and challenge. Multimedia Tools Appl. 1\u201334 (2023)","DOI":"10.1007\/s11042-023-15968-9"},{"key":"8_CR7","doi-asserted-by":"crossref","unstructured":"Brate, R., Nesterov, A., Vogelmann, V., Van\u00a0Ossenbruggen, J., Hollink, L., Van\u00a0Erp, M.: Capturing contentiousness: constructing the contentious terms in context corpus. In: Proceedings of the 11th Knowledge Capture Conference, pp. 17\u201324 (2021)","DOI":"10.1145\/3460210.3493553"},{"key":"8_CR8","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"502","DOI":"10.1007\/978-3-030-68796-0_36","volume-title":"Pattern Recognition. ICPR International Workshops and Challenges","author":"E Cetinic","year":"2021","unstructured":"Cetinic, E.: Iconographic image captioning for artworks. In: del Bimbo, A., et al. (eds.) ICPR 2021. LNCS, vol. 12663, pp. 502\u2013516. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-68796-0_36"},{"key":"8_CR9","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1016\/j.eswa.2018.07.026","volume":"114","author":"E Cetinic","year":"2018","unstructured":"Cetinic, E., Lipic, T., Grgic, S.: Fine-tuning convolutional neural networks for fine art classification. Expert Syst. Appl. 114, 107\u2013118 (2018)","journal-title":"Expert Syst. Appl."},{"key":"8_CR10","doi-asserted-by":"crossref","unstructured":"Crowley, E.J., Zisserman, A.: In search of art. In: Computer Vision-ECCV 2014 Workshops: Zurich, Switzerland, September 6\u20137 and 12, 2014, Proceedings, Part I 13, pp. 54\u201370. Springer (2015)","DOI":"10.1007\/978-3-319-16178-5_4"},{"key":"8_CR11","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"721","DOI":"10.1007\/978-3-319-46604-0_50","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"EJ Crowley","year":"2016","unstructured":"Crowley, E.J., Zisserman, A.: The art of detection. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9913, pp. 721\u2013737. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46604-0_50"},{"key":"8_CR12","doi-asserted-by":"crossref","unstructured":"Deng, C., Wu, Q., Wu, Q., Hu, F., Lyu, F., Tan, M.: Visual grounding via accumulated attention. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00808"},{"key":"8_CR13","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: Bert: pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)"},{"key":"8_CR14","doi-asserted-by":"crossref","unstructured":"Dhamija, A., Gunther, M., Ventura, J., Boult, T.: The overlooked elephant of object detection: open set. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1021\u20131030 (2020)","DOI":"10.1109\/WACV45572.2020.9093355"},{"issue":"2","key":"8_CR15","doi-asserted-by":"publisher","first-page":"50","DOI":"10.26833\/ijeg.378257","volume":"3","author":"Y Do\u011fan","year":"2018","unstructured":"Do\u011fan, Y., Yakar, M.: Gis and three-dimensional modeling for cultural heritages. Int. J. Eng. Geosci. 3(2), 50\u201355 (2018)","journal-title":"Int. J. Eng. Geosci."},{"key":"8_CR16","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Gool, L.V., Williams, C., Winn, J., Zisserman, A.: The pascal visual object classes (VOC) challenge. Int. J. Comput. Vis. 88, 303\u2013338 (2010). https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int. J. Comput. Vis."},{"key":"8_CR17","doi-asserted-by":"crossref","unstructured":"Foka, A.F.: Computer vision applications for art history: reflections and paradigms for future research. In: Proceedings of EVA London 2021, pp. 73\u201380. BCS Learning & Development (2021)","DOI":"10.14236\/ewic\/EVA2021.12"},{"key":"8_CR18","doi-asserted-by":"crossref","unstructured":"Garcia, N., Renoust, B., Nakashima, Y.: Context-aware embeddings for automatic art analysis. In: Proceedings of the 2019 on International Conference on Multimedia Retrieval, pp. 25\u201333 (2019)","DOI":"10.1145\/3323873.3325028"},{"key":"8_CR19","doi-asserted-by":"crossref","unstructured":"Garcia, N., Vogiatzis, G.: How to read paintings: semantic art understanding with multi-modal retrieval. In: Proceedings of the European Conference in Computer Vision Workshops (2018)","DOI":"10.1007\/978-3-030-11012-3_52"},{"key":"8_CR20","doi-asserted-by":"crossref","unstructured":"Gonthier, N., Gousseau, Y., Ladjal, S., Bonfait, O.: Weakly supervised object detection in artworks. In: Proceedings of the European Conference on Computer Vision (ECCV) Workshops, pp.\u00a00\u20130 (2018)","DOI":"10.1007\/978-3-030-11012-3_53"},{"key":"8_CR21","doi-asserted-by":"publisher","first-page":"103299","DOI":"10.1016\/j.cviu.2021.103299","volume":"214","author":"N Gonthier","year":"2022","unstructured":"Gonthier, N., Ladjal, S., Gousseau, Y.: Multiple instance learning on deep features for weakly supervised object detection with extreme domain shifts. Comput. Vis. Image Underst. 214, 103299 (2022)","journal-title":"Comput. Vis. Image Underst."},{"key":"8_CR22","unstructured":"Gu, X., Lin, T.Y., Kuo, W., Cui, Y.: Open-vocabulary object detection via vision and language knowledge distillation (2022)"},{"key":"8_CR23","doi-asserted-by":"crossref","unstructured":"Gupta, T., Shih, K., Singh, S., Hoiem, D.: Aligned image-word representations improve inductive transfer across vision-language tasks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4213\u20134222 (2017)","DOI":"10.1109\/ICCV.2017.452"},{"key":"8_CR24","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"59","DOI":"10.1007\/978-3-030-58607-2_4","volume-title":"Computer Vision \u2013 ECCV 2020","author":"T Hui","year":"2020","unstructured":"Hui, T., et al.: Linguistic structure guided context modeling for referring image segmentation. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12355, pp. 59\u201375. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58607-2_4"},{"key":"8_CR25","doi-asserted-by":"crossref","unstructured":"Inoue, N., Furuta, R., Yamasaki, T., Aizawa, K.: Cross-domain weakly-supervised object detection through progressive domain adaptation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2018)","DOI":"10.1109\/CVPR.2018.00525"},{"key":"8_CR26","doi-asserted-by":"crossref","unstructured":"Jeon, H.J., Jung, S., Choi, Y.S., Kim, J.W., Kim, J.S.: Object detection in artworks using data augmentation. In: 2020 International Conference on Information and Communication Technology Convergence (ICTC), pp. 1312\u20131314. IEEE (2020)","DOI":"10.1109\/ICTC49870.2020.9289321"},{"key":"8_CR27","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"key":"8_CR28","doi-asserted-by":"crossref","unstructured":"Kadish, D., Risi, S., L\u00f8vlie, A.S.: Improving object detection in art images using only style transfer. In: 2021 International Joint Conference on Neural Networks (IJCNN), pp.\u00a01\u20138. IEEE (2021)","DOI":"10.1109\/IJCNN52387.2021.9534264"},{"key":"8_CR29","doi-asserted-by":"crossref","unstructured":"Kamath, A., Singh, M., LeCun, Y., Synnaeve, G., Misra, I., Carion, N.: Mdetr-modulated detection for end-to-end multi-modal understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1780\u20131790 (2021)","DOI":"10.1109\/ICCV48922.2021.00180"},{"key":"8_CR30","doi-asserted-by":"crossref","unstructured":"Kazemzadeh, S., Ordonez, V., Matten, M., Berg, T.: Referitgame: referring to objects in photographs of natural scenes. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 787\u2013798 (2014)","DOI":"10.3115\/v1\/D14-1086"},{"key":"8_CR31","doi-asserted-by":"crossref","unstructured":"Khan, S., van Noord, N.: Stylistic multi-task analysis of ukiyo-e woodblock prints (2021)","DOI":"10.5244\/C.35.410"},{"key":"8_CR32","doi-asserted-by":"crossref","unstructured":"Li, L.H., et al.: Grounded language-image pre-training (2022)","DOI":"10.1109\/CVPR52688.2022.01069"},{"key":"8_CR33","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"8_CR34","doi-asserted-by":"crossref","unstructured":"Liu, R., Liu, C., Bai, Y., Yuille, A.L.: Clevr-ref+: diagnosing visual reasoning with referring expressions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4185\u20134194 (2019)","DOI":"10.1109\/CVPR.2019.00431"},{"key":"8_CR35","doi-asserted-by":"crossref","unstructured":"Liu, S., et al.: Grounding dino: marrying dino with grounded pre-training for open-set object detection (2023)","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"8_CR36","doi-asserted-by":"publisher","first-page":"163","DOI":"10.1016\/j.neucom.2022.01.068","volume":"490","author":"Y Lu","year":"2022","unstructured":"Lu, Y., Guo, C., Dai, X., Wang, F.Y.: Data-efficient image captioning of fine art paintings via virtual-real semantic alignment training. Neurocomputing 490, 163\u2013180 (2022)","journal-title":"Neurocomputing"},{"key":"8_CR37","doi-asserted-by":"crossref","unstructured":"Mao, H., Cheung, M., She, J.: Deepart: learning joint representations of visual arts. In: Proceedings of the 25th ACM International Conference on Multimedia, pp. 1183\u20131191 (2017)","DOI":"10.1145\/3123266.3123405"},{"key":"8_CR38","doi-asserted-by":"crossref","unstructured":"Mao, J., Huang, J., Toshev, A., Camburu, O., Yuille, A.L., Murphy, K.: Generation and comprehension of unambiguous object descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 11\u201320 (2016)","DOI":"10.1109\/CVPR.2016.9"},{"key":"8_CR39","doi-asserted-by":"crossref","unstructured":"Mensink, T., Van\u00a0Gemert, J.: The rijksmuseum challenge: museum-centered visual recognition. In: Proceedings of International Conference on Multimedia Retrieval, pp. 451\u2013454 (2014)","DOI":"10.1145\/2578726.2578791"},{"key":"8_CR40","doi-asserted-by":"crossref","unstructured":"Mermet, A., Kitamoto, A., Suzuki, C., Takagishi, A.: Face detection on pre-modern Japanese artworks using R-CNN and image patching for semi-automatic annotation. In: Proceedings of the 2nd Workshop on Structuring and Understanding of Multimedia heritAge Contents, pp. 23\u201331 (2020)","DOI":"10.1145\/3423323.3423412"},{"key":"8_CR41","doi-asserted-by":"publisher","unstructured":"Milani, F., Fraternali, P.: A dataset and a convolutional model for iconography classification in paintings. J. Comput. Cult. Heritage 14(4), 1\u201318 (2021). https:\/\/doi.org\/10.1145\/3458885","DOI":"10.1145\/3458885"},{"issue":"8","key":"8_CR42","doi-asserted-by":"publisher","first-page":"215","DOI":"10.3390\/jimaging8080215","volume":"8","author":"F Milani","year":"2022","unstructured":"Milani, F., Pinciroli Vago, N.O., Fraternali, P.: Proposals generation for weakly supervised object detection in artwork images. J. Imaging 8(8), 215 (2022)","journal-title":"J. Imaging"},{"issue":"4","key":"8_CR43","doi-asserted-by":"publisher","first-page":"339","DOI":"10.1080\/01973762.2021.1928864","volume":"36","author":"A N\u00e4slund Dahlgren","year":"2020","unstructured":"N\u00e4slund Dahlgren, A., Wasielewski, A.: Cultures of digitization: a historiographic perspective on digital art history. Vis. Resour. 36(4), 339\u2013359 (2020)","journal-title":"Vis. Resour."},{"key":"8_CR44","unstructured":"OpenAI: Gpt-4 technical report (2024)"},{"key":"8_CR45","doi-asserted-by":"publisher","first-page":"104098","DOI":"10.1016\/j.imavis.2021.104098","volume":"107","author":"G Pasqualino","year":"2021","unstructured":"Pasqualino, G., Furnari, A., Signorello, G., Farinella, G.M.: An unsupervised domain adaptation scheme for single-stage artwork recognition in cultural sites. Image Vis. Comput. 107, 104098 (2021)","journal-title":"Image Vis. Comput."},{"key":"8_CR46","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"8_CR47","unstructured":"Raffel, C., et al.: Exploring the limits of transfer learning with a unified text-to-text transformer (2023)"},{"key":"8_CR48","doi-asserted-by":"crossref","unstructured":"Reshetnikov, A., Marinescu, M.C., Lopez, J.M.: Deart: dataset of European art (2022)","DOI":"10.1007\/978-3-031-25056-9_15"},{"key":"8_CR49","doi-asserted-by":"publisher","unstructured":"Reshetnikov, A., et al.: Image descriptions for 7471 of the DEArt images, obtained via the zooniverse crowdsourcing platform (2023). https:\/\/doi.org\/10.5281\/zenodo.7575290","DOI":"10.5281\/zenodo.7575290"},{"key":"8_CR50","doi-asserted-by":"publisher","unstructured":"Robot, O.D.: Moma collection - automatic update (2024). https:\/\/doi.org\/10.5281\/zenodo.10781117","DOI":"10.5281\/zenodo.10781117"},{"key":"8_CR51","unstructured":"Saleh, B., Elgammal, A.: Large-scale classification of fine-art paintings: learning the right metric on the right feature. arXiv preprint arXiv:1505.00855 (2015)"},{"key":"8_CR52","doi-asserted-by":"crossref","unstructured":"Song, Y., Zhang, R., Chen, Z., Wan, X., Li, G.: Advancing visual grounding with scene knowledge: Benchmark and method. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 15039\u201315049 (2023)","DOI":"10.1109\/CVPR52729.2023.01444"},{"key":"8_CR53","unstructured":"Strezoski, G., Worring, M.: OmniArt: multi-task deep learning for artistic data analysis. arXiv preprint arXiv:1708.00684 (2017)"},{"key":"8_CR54","doi-asserted-by":"crossref","unstructured":"Tan, W.R., Chan, C.S., Aguirre, H.E., Tanaka, K.: Ceci n\u2019est pas une pipe: a deep convolutional network for fine-art paintings classification. In: 2016 IEEE International Conference on Image Processing (ICIP), pp. 3703\u20133707. IEEE (2016)","DOI":"10.1109\/ICIP.2016.7533051"},{"key":"8_CR55","unstructured":"Tian, Y., Suzuki, C., Clanuwat, T., Bober-Irizar, M., Lamb, A., Kitamoto, A.: Kaokore: a pre-modern Japanese art facial expression dataset. arXiv preprint arXiv:2002.08595 (2020)"},{"issue":"1\u20132","key":"8_CR56","doi-asserted-by":"publisher","first-page":"72","DOI":"10.1080\/13500775.2019.1638031","volume":"71","author":"S Vawda","year":"2019","unstructured":"Vawda, S.: Museums and the epistemology of injustice: from colonialism to decoloniality. Museum Int. 71(1\u20132), 72\u201379 (2019)","journal-title":"Museum Int."},{"key":"8_CR57","doi-asserted-by":"crossref","unstructured":"Wang, L., Li, Y., Lazebnik, S.: Learning deep structure-preserving image-text embeddings. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5005\u20135013 (2016)","DOI":"10.1109\/CVPR.2016.541"},{"key":"8_CR58","doi-asserted-by":"crossref","unstructured":"Wei, B., et al.: End-to-end transformer-based open-vocabulary keyword spotting with location-guided local attention. In: Interspeech, pp. 361\u2013365 (2021)","DOI":"10.21437\/Interspeech.2021-1335"},{"key":"8_CR59","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"825","DOI":"10.1007\/978-3-319-46604-0_57","volume-title":"Computer Vision \u2013 ECCV 2016 Workshops","author":"N Westlake","year":"2016","unstructured":"Westlake, N., Cai, H., Hall, P.: Detecting people in artwork with CNNs. In: Hua, G., J\u00e9gou, H. (eds.) ECCV 2016. LNCS, vol. 9913, pp. 825\u2013841. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46604-0_57"},{"key":"8_CR60","doi-asserted-by":"crossref","unstructured":"Westlake, N., Cai, H., Hall, P.: Detecting people in artwork with CNNs. In: Computer Vision\u2013ECCV 2016 Workshops: Amsterdam, The Netherlands, October 8\u201310 and 15\u201316, 2016, Proceedings, Part I 14, pp. 825\u2013841. Springer (2016)","DOI":"10.1007\/978-3-319-46604-0_57"},{"key":"8_CR61","doi-asserted-by":"crossref","unstructured":"Wu, H., et al.: Unified visual-semantic embeddings: bridging vision and language with structured meaning representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6609\u20136618 (2019)","DOI":"10.1109\/CVPR.2019.00677"},{"key":"8_CR62","doi-asserted-by":"crossref","unstructured":"Yang, L., Xu, Y., Yuan, C., Liu, W., Li, B., Hu, W.: Improving visual grounding with visual-linguistic verification and iterative reasoning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9499\u20139508 (2022)","DOI":"10.1109\/CVPR52688.2022.00928"},{"key":"8_CR63","doi-asserted-by":"crossref","unstructured":"Yang, S., Li, G., Yu, Y.: Graph-structured referring expression reasoning in the wild. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9952\u20139961 (2020)","DOI":"10.1109\/CVPR42600.2020.00997"},{"key":"8_CR64","doi-asserted-by":"crossref","unstructured":"Yaniv, J., Newman, Y., Shamir, A.: The face of art: landmark detection and geometric style in portraits. ACM Trans. Graph. (TOG) 38(4), 1\u201315 (2019)","DOI":"10.1145\/3306346.3322984"},{"key":"8_CR65","doi-asserted-by":"crossref","unstructured":"Yao, L., et al.: Detclip: dictionary-enriched visual-concept paralleled pre-training for open-world detection. In: Advances in Neural Information Processing Systems 35, 9125\u20139138 (2022)","DOI":"10.52202\/068431-0663"},{"key":"8_CR66","unstructured":"Yi, J.S.K., Kim, Y., Chernova, S.: Incremental object grounding using scene graphs. arXiv preprint arXiv:2201.01901 (2022)"},{"key":"8_CR67","doi-asserted-by":"crossref","unstructured":"Yin, R., Monson, E., Honig, E., Daubechies, I., Maggioni, M.: Object recognition in art drawings: transfer of a neural network. In: 2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pp. 2299\u20132303. IEEE (2016)","DOI":"10.1109\/ICASSP.2016.7472087"},{"key":"8_CR68","doi-asserted-by":"crossref","unstructured":"Yu, L., Poirson, P., Yang, S., Berg, A.C., Berg, T.L.: Modeling context in referring expressions. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part II 14, pp. 69\u201385. Springer (2016)","DOI":"10.1007\/978-3-319-46475-6_5"},{"key":"8_CR69","doi-asserted-by":"crossref","unstructured":"Zhou, Z.H.: A brief introduction to weakly supervised learning. Natl. Sci. Rev. 5, 44\u201353 (2018)","DOI":"10.1093\/nsr\/nwx106"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91572-7_8","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,21]],"date-time":"2026-05-21T00:53:43Z","timestamp":1779324823000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91572-7_8"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031915710","9783031915727"],"references-count":69,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91572-7_8","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}