{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T07:59:50Z","timestamp":1771919990298,"version":"3.50.1"},"publisher-location":"Cham","reference-count":58,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031728471","type":"print"},{"value":"9783031728488","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,29]],"date-time":"2024-11-29T00:00:00Z","timestamp":1732838400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72848-8_26","type":"book-chapter","created":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T13:37:10Z","timestamp":1732801030000},"page":"444-461","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["CIC-BART-SSA: Controllable Image Captioning with\u00a0Structured Semantic Augmentation"],"prefix":"10.1007","author":[{"given":"Kalliopi","family":"Basioti","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mohamed A.","family":"Abdelsalam","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Federico","family":"Fancellu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Vladimir","family":"Pavlovic","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Afsaneh","family":"Fazly","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,11,29]]},"reference":[{"key":"26_CR1","doi-asserted-by":"crossref","unstructured":"Abdelsalam, M.A., et al.: Visual semantic parsing: from images to abstract meaning representation. In: Proceedings of the 26th Conference on Computational Natural Language Learning (CoNLL), pp. 282\u2013300 (2022)","DOI":"10.18653\/v1\/2022.conll-1.19"},{"key":"26_CR2","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"382","DOI":"10.1007\/978-3-319-46454-1_24","volume-title":"Computer Vision \u2013 ECCV 2016","author":"P Anderson","year":"2016","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: SPICE: semantic propositional image caption evaluation. In: Leibe, B., Matas, J., Sebe, N., Welling, M. (eds.) ECCV 2016. LNCS, vol. 9909, pp. 382\u2013398. Springer, Cham (2016). https:\/\/doi.org\/10.1007\/978-3-319-46454-1_24"},{"key":"26_CR3","doi-asserted-by":"crossref","unstructured":"Anderson, P., et al.: Bottom-up and top-down attention for image captioning and visual question answering. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00636"},{"key":"26_CR4","doi-asserted-by":"crossref","unstructured":"Aneja, J., Agrawal, H., Batra, D., Schwing, A.: Sequential latent spaces for modeling the intention during diverse image captioning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 4261\u20134270 (2019)","DOI":"10.1109\/ICCV.2019.00436"},{"key":"26_CR5","unstructured":"Astudillo, R.F., Ballesteros, M., Naseem, T., Blodgett, A., Florian, R.: Transition-based parsing with stack-transformers. arXiv preprint arXiv:2010.10669 (2020)"},{"key":"26_CR6","doi-asserted-by":"crossref","unstructured":"Banarescu, L., et al.: Abstract meaning representation (AMR) 1.0 specification. In: Parsing on Freebase from Question-Answer Pairs. In Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing. Seattle: ACL, pp. 1533\u20131544 (2012)","DOI":"10.18653\/v1\/D13-1160"},{"key":"26_CR7","unstructured":"Banarescu, L., et al.: Abstract meaning representation for sembanking. In: Proceedings of the 7th Linguistic Annotation Workshop and Interoperability with Discourse, pp. 178\u2013186 (2013)"},{"key":"26_CR8","doi-asserted-by":"crossref","unstructured":"Bevilacqua, M., Blloshmi, R., Navigli, R.: One spring to rule them both: symmetric AMR semantic parsing and generation without a complex pipeline. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a035, pp. 12564\u201312573 (2021)","DOI":"10.1609\/aaai.v35i14.17489"},{"key":"26_CR9","doi-asserted-by":"crossref","unstructured":"Blloshmi, R., Bevilacqua, M., Fabiano, E., Caruso, V., Navigli, R.: Spring goes online: end-to-end AMR parsing and generation. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 134\u2013142 (2021)","DOI":"10.18653\/v1\/2021.emnlp-demo.16"},{"key":"26_CR10","unstructured":"Cai, S., Knight, K.: Smatch: an evaluation metric for semantic feature structures. In: Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers), pp. 748\u2013752 (2013)"},{"issue":"1","key":"26_CR11","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TPAMI.2021.3137605","volume":"45","author":"X Chang","year":"2021","unstructured":"Chang, X., Ren, P., Xu, P., Li, Z., Chen, X., Hauptmann, A.: A comprehensive survey of scene graphs: generation and application. IEEE Trans. Pattern Anal. Mach. Intell. 45(1), 1\u201326 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"26_CR12","doi-asserted-by":"crossref","unstructured":"Chen, L., Jiang, Z., Xiao, J., Liu, W.: Human-like controllable image captioning with verb-specific semantic roles. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16846\u201316856 (2021)","DOI":"10.1109\/CVPR46437.2021.01657"},{"key":"26_CR13","doi-asserted-by":"crossref","unstructured":"Chen, S., Jin, Q., Wang, P., Wu, Q.: Say as you wish: fine-grained control of image caption generation with abstract scene graphs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9962\u20139971 (2020)","DOI":"10.1109\/CVPR42600.2020.00998"},{"key":"26_CR14","unstructured":"Cho, J., Lei, J., Tan, H., Bansal, M.: Unifying vision-and-language tasks via text generation. In: International Conference on Machine Learning, pp. 1931\u20131942. PMLR (2021)"},{"key":"26_CR15","doi-asserted-by":"crossref","unstructured":"Choi, W.S., Heo, Y.J., Punithan, D., Zhang, B.T.: Scene graph parsing via abstract meaning representation in pre-trained language models. In: NAACL 2022 Workshop on Deep Learning on Graphs for Natural Language Processing (2022)","DOI":"10.18653\/v1\/2022.dlg4nlp-1.4"},{"key":"26_CR16","unstructured":"Choi, W.S., Heo, Y.J., Zhang, B.T.: Sgram: improving scene graph parsing via abstract meaning representation. arXiv preprint arXiv:2210.08675 (2022)"},{"key":"26_CR17","unstructured":"Chunseong\u00a0Park, C., Kim, B., Kim, G.: Attend to you: personalized image captioning with context sequence memory networks. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 895\u2013903 (2017)"},{"key":"26_CR18","doi-asserted-by":"crossref","unstructured":"Cornia, M., Baraldi, L., Cucchiara, R.: Show, control and tell: a framework for generating controllable and grounded captions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8307\u20138316 (2019)","DOI":"10.1109\/CVPR.2019.00850"},{"key":"26_CR19","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"712","DOI":"10.1007\/978-3-030-58601-0_42","volume-title":"Computer Vision \u2013 ECCV 2020","author":"C Deng","year":"2020","unstructured":"Deng, C., Ding, N., Tan, M., Wu, Q.: Length-controllable image captioning. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12358, pp. 712\u2013729. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58601-0_42"},{"key":"26_CR20","doi-asserted-by":"crossref","unstructured":"Gan, C., Gan, Z., He, X., Gao, J., Deng, L.: Stylenet: generating attractive visual captions with styles. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3137\u20133146 (2017)","DOI":"10.1109\/CVPR.2017.108"},{"key":"26_CR21","doi-asserted-by":"crossref","unstructured":"Guo, L., Liu, J., Yao, P., Li, J., Lu, H.: MSCap: multi-style image captioning with unpaired stylized text. In: 2019 IEEE CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 4199\u20134208 (2019)","DOI":"10.1109\/CVPR.2019.00433"},{"key":"26_CR22","unstructured":"Hirsch, E., Tal, A.: Clid: controlled-length image descriptions with limited data. arXiv preprint arXiv:2211.14835 (2022)"},{"key":"26_CR23","doi-asserted-by":"crossref","unstructured":"Johnson, J., Karpathy, A., Fei-Fei, L.: Densecap: fully convolutional localization networks for dense captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4565\u20134574 (2016)","DOI":"10.1109\/CVPR.2016.494"},{"key":"26_CR24","doi-asserted-by":"crossref","unstructured":"Johnson, J., et al.: Image retrieval using scene graphs. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3668\u20133678 (2015)","DOI":"10.1109\/CVPR.2015.7298990"},{"key":"26_CR25","doi-asserted-by":"crossref","unstructured":"Karpathy, A., Fei-Fei, L.: Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 3128\u20133137 (2015)","DOI":"10.1109\/CVPR.2015.7298932"},{"key":"26_CR26","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"121","DOI":"10.1007\/978-3-030-58577-8_8","volume-title":"Computer Vision \u2013 ECCV 2020","author":"X Li","year":"2020","unstructured":"Li, X., et al.: Oscar: object-semantics aligned pre-training for vision-language tasks. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12375, pp. 121\u2013137. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58577-8_8"},{"key":"26_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume-title":"Computer Vision \u2013 ECCV 2014","author":"T-Y Lin","year":"2014","unstructured":"Lin, T.-Y., et al.: Microsoft COCO: common objects in context. In: Fleet, D., Pajdla, T., Schiele, B., Tuytelaars, T. (eds.) ECCV 2014. LNCS, vol. 8693, pp. 740\u2013755. Springer, Cham (2014). https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48"},{"key":"26_CR28","doi-asserted-by":"crossref","unstructured":"Lindh, A., Ross, R.J., Kelleher, J.D.: Language-driven region pointer advancement for controllable image captioning. arXiv preprint arXiv:2011.14901 (2020)","DOI":"10.18653\/v1\/2020.coling-main.174"},{"key":"26_CR29","doi-asserted-by":"crossref","unstructured":"Lu, J., Yang, J., Batra, D., Parikh, D.: Neural baby talk. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7219\u20137228 (2018)","DOI":"10.1109\/CVPR.2018.00754"},{"issue":"5\u20136","key":"26_CR30","doi-asserted-by":"publisher","first-page":"365","DOI":"10.1016\/0031-3203(79)90049-9","volume":"11","author":"A Lukasov\u00e1","year":"1979","unstructured":"Lukasov\u00e1, A.: Hierarchical agglomerative clustering procedure. Pattern Recogn. 11(5\u20136), 365\u2013381 (1979)","journal-title":"Pattern Recogn."},{"key":"26_CR31","doi-asserted-by":"crossref","unstructured":"Luo, J., et al.: Semantic-conditional diffusion networks for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23359\u201323368 (2023)","DOI":"10.1109\/CVPR52729.2023.02237"},{"key":"26_CR32","doi-asserted-by":"crossref","unstructured":"Mathews, A., Xie, L., He, X.: Senticap: generating image descriptions with sentiments. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a030 (2016)","DOI":"10.1609\/aaai.v30i1.10475"},{"key":"26_CR33","doi-asserted-by":"crossref","unstructured":"Mathews, A., Xie, L., He, X.: Semstyle: learning to generate stylised image captions using unaligned text. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 8591\u20138600 (2018)","DOI":"10.1109\/CVPR.2018.00896"},{"key":"26_CR34","unstructured":"M\u00fcllner, D.: Modern hierarchical, agglomerative clustering algorithms. arXiv preprint arXiv:1109.2378 (2011)"},{"key":"26_CR35","doi-asserted-by":"crossref","unstructured":"Pennington, J., Socher, R., Manning, C.D.: Glove: global vectors for word representation. In: Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP), pp. 1532\u20131543 (2014)","DOI":"10.3115\/v1\/D14-1162"},{"key":"26_CR36","doi-asserted-by":"crossref","unstructured":"Plummer, B.A., Wang, L., Cervantes, C.M., Caicedo, J.C., Hockenmaier, J., Lazebnik, S.: Flickr30k entities: collecting region-to-phrase correspondences for richer image-to-sentence models. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2641\u20132649 (2015)","DOI":"10.1109\/ICCV.2015.303"},{"key":"26_CR37","doi-asserted-by":"crossref","unstructured":"Ramos, R., Martins, B., Elliott, D., Kementchedjhieva, Y.: Smallcap: lightweight image captioning prompted with retrieval augmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2840\u20132849 (2023)","DOI":"10.1109\/CVPR52729.2023.00278"},{"key":"26_CR38","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, vol. 28 (2015)"},{"key":"26_CR39","doi-asserted-by":"crossref","unstructured":"Ren, Y., et al.: Crossing the gap: domain generalization for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2871\u20132880 (2023)","DOI":"10.1109\/CVPR52729.2023.00281"},{"key":"26_CR40","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7008\u20137024 (2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"26_CR41","doi-asserted-by":"crossref","unstructured":"Shuster, K., Humeau, S., Hu, H., Bordes, A., Weston, J.: Engaging image captioning via personality. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12516\u201312526 (2019)","DOI":"10.1109\/CVPR.2019.01280"},{"key":"26_CR42","doi-asserted-by":"crossref","unstructured":"Toutanova, K., Klein, D., Manning, C.D., Singer, Y.: Feature-rich part-of-speech tagging with a cyclic dependency network. In: Proceedings of the 2003 Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics, pp. 252\u2013259 (2003)","DOI":"10.3115\/1073445.1073478"},{"key":"26_CR43","doi-asserted-by":"crossref","unstructured":"Toutanvoa, K., Manning, C.D.: Enriching the knowledge sources used in a maximum entropy part-of-speech tagger. In: 2000 Joint SIGDAT Conference on Empirical Methods in Natural Language Processing and Very Large Corpora, pp. 63\u201370 (2000)","DOI":"10.3115\/1117794.1117802"},{"key":"26_CR44","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence\u00a0Zitnick, C., Parikh, D.: Cider: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"26_CR45","doi-asserted-by":"crossref","unstructured":"Wang, N., Xie, J., Wu, J., Jia, M., Li, L.: Controllable image captioning via prompting. In: AAAI (2023)","DOI":"10.1609\/aaai.v37i2.25360"},{"key":"26_CR46","doi-asserted-by":"crossref","unstructured":"Wang, Q., Chan, A.B.: Describing like humans: on diversity in image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4195\u20134203 (2019)","DOI":"10.1109\/CVPR.2019.00432"},{"key":"26_CR47","unstructured":"Wang, T., et\u00a0al.: Caption anything: interactive image description with diverse multimodal controls. arXiv preprint arXiv:2305.02677 (2023)"},{"key":"26_CR48","doi-asserted-by":"crossref","unstructured":"Wang, Z., Xiao, J., Chen, L., Gao, F., Shao, J., Chen, L.: Learning combinatorial prompts for universal controllable image captioning. arXiv preprint arXiv:2303.06338 (2023)","DOI":"10.1007\/s11263-024-02179-4"},{"key":"26_CR49","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"786","DOI":"10.1007\/978-3-030-88480-2_63","volume-title":"Natural Language Processing and Chinese Computing","author":"Q Xia","year":"2021","unstructured":"Xia, Q., et al.: XGPT: cross-modal generative pre-training for image captioning. In: Wang, L., Feng, Y., Hong, Yu., He, R. (eds.) NLPCC 2021. LNCS (LNAI), vol. 13028, pp. 786\u2013797. Springer, Cham (2021). https:\/\/doi.org\/10.1007\/978-3-030-88480-2_63"},{"key":"26_CR50","unstructured":"Xu, K., et al.: Show, attend and tell: neural image caption generation with visual attention. In: International Conference on Machine Learning, pp. 2048\u20132057. PMLR (2015)"},{"key":"26_CR51","doi-asserted-by":"publisher","first-page":"67","DOI":"10.1162\/tacl_a_00166","volume":"2","author":"P Young","year":"2014","unstructured":"Young, P., Lai, A., Hodosh, M., Hockenmaier, J.: From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions. Trans. Assoc. Comput. Linguist. 2, 67\u201378 (2014)","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"26_CR52","doi-asserted-by":"crossref","unstructured":"Zellers, R., Yatskar, M., Thomson, S., Choi, Y.: Neural motifs: scene graph parsing with global context. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5831\u20135840 (2018)","DOI":"10.1109\/CVPR.2018.00611"},{"key":"26_CR53","doi-asserted-by":"crossref","unstructured":"Zeng, Z., Zhang, H., Lu, R., Wang, D., Chen, B., Wang, Z.: Conzic: controllable zero-shot image captioning by sampling-based polishing. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23465\u201323476 (2023)","DOI":"10.1109\/CVPR52729.2023.02247"},{"key":"26_CR54","doi-asserted-by":"crossref","unstructured":"Zhao, W., Wu, X., Zhang, X.: Memcap: memorizing style knowledge for image captioning. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a034, pp. 12984\u201312992 (2020)","DOI":"10.1609\/aaai.v34i07.6998"},{"key":"26_CR55","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Wei, J., Lin, Z., Sun, Y., Zhang, M., Zhang, M.: Visual spatial description: controlled spatial-oriented image-to-text generation. arXiv preprint arXiv:2210.11109 (2022)","DOI":"10.18653\/v1\/2022.emnlp-main.93"},{"key":"26_CR56","doi-asserted-by":"crossref","unstructured":"Zheng, Y., Li, Y., Wang, S.: Intention oriented image captions with guiding objects. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8395\u20138404 (2019)","DOI":"10.1109\/CVPR.2019.00859"},{"key":"26_CR57","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/978-3-030-58568-6_13","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Y Zhong","year":"2020","unstructured":"Zhong, Y., Wang, L., Chen, J., Yu, D., Li, Y.: Comprehensive image captioning via scene graph decomposition. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12359, pp. 211\u2013229. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58568-6_13"},{"key":"26_CR58","doi-asserted-by":"crossref","unstructured":"Zhu, W., Bhat, S.: Gruen for evaluating linguistic quality of generated text. arXiv preprint arXiv:2010.02498 (2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.9"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72848-8_26","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T14:12:44Z","timestamp":1732803164000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72848-8_26"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,29]]},"ISBN":["9783031728471","9783031728488"],"references-count":58,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72848-8_26","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,29]]},"assertion":[{"value":"29 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}