{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T17:24:37Z","timestamp":1771953877864,"version":"3.50.1"},"publisher-location":"Cham","reference-count":43,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031915840","type":"print"},{"value":"9783031915857","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-91585-7_23","type":"book-chapter","created":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T08:29:55Z","timestamp":1748248195000},"page":"382-398","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Hyperbolic Learning with\u00a0Multimodal Large Language Models"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4493-2497","authenticated-orcid":false,"given":"Paolo","family":"Mandica","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0107-6755","authenticated-orcid":false,"given":"Luca","family":"Franco","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Konstantinos","family":"Kallidromitis","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Suzanne","family":"Petryk","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1875-7813","authenticated-orcid":false,"given":"Fabio","family":"Galasso","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,5,12]]},"reference":[{"key":"23_CR1","first-page":"23716","volume":"35","author":"JB Alayrac","year":"2022","unstructured":"Alayrac, J.B., et al.: Flamingo: a visual language model for few-shot learning. Adv. Neural. Inf. Process. Syst. 35, 23716\u201323736 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"23_CR2","doi-asserted-by":"crossref","unstructured":"Anderson, P., Fernando, B., Johnson, M., Gould, S.: SPICE: semantic propositional image caption evaluation. In: Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, 11\u201314 October 2016, Proceedings, Part V 14, pp. 382\u2013398. Springer (2016)","DOI":"10.1007\/978-3-319-46454-1_24"},{"key":"23_CR3","doi-asserted-by":"crossref","unstructured":"Atigh, M.G., Schoep, J., Acar, E., Van\u00a0Noord, N., Mettes, P.: Hyperbolic image segmentation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4453\u20134462 (2022)","DOI":"10.1109\/CVPR52688.2022.00441"},{"key":"23_CR4","unstructured":"Banerjee, S., Lavie, A.: METEOR: an automatic metric for MT evaluation with improved correlation with human judgments. In: Goldstein, J., Lavie, A., Lin, C.Y., Voss, C. (eds.) Proceedings of the ACL Workshop on Intrinsic and Extrinsic Evaluation Measures for Machine Translation and\/or Summarization, pp. 65\u201372. Association for Computational Linguistics, Ann Arbor (2005). https:\/\/aclanthology.org\/W05-0909"},{"key":"23_CR5","unstructured":"Bdeir, A., Schwethelm, K., Landwehr, N.: Fully hyperbolic convolutional neural networks for computer vision. In: The Twelfth International Conference on Learning Representations (2024). https:\/\/openreview.net\/forum?id=ekz1hN5QNh"},{"key":"23_CR6","unstructured":"Chami, I., Ying, Z., R\u00e9, C., Leskovec, J.: Hyperbolic graph convolutional neural networks. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"23_CR7","doi-asserted-by":"crossref","unstructured":"Chen, B., Peng, W., Cao, X., R\u00f6ning, J.: Hyperbolic uncertainty aware semantic segmentation. IEEE Trans. Intell. Transp. Syst. (2023)","DOI":"10.1109\/TITS.2023.3312290"},{"key":"23_CR8","unstructured":"Chen, T., Kornblith, S., Norouzi, M., Hinton, G.: A simple framework for contrastive learning of visual representations. In: Proceedings of the 37th International Conference on Machine Learning, ICML 2020. JMLR.org (2020)"},{"key":"23_CR9","doi-asserted-by":"crossref","unstructured":"Chen, X., He, K.: Exploring simple Siamese representation learning. In: 2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 15745\u201315753 (2020). https:\/\/api.semanticscholar.org\/CorpusID:227118869","DOI":"10.1109\/CVPR46437.2021.01549"},{"key":"23_CR10","unstructured":"Dai, W., et al.: InstructBLIP: towards general-purpose vision-language models with instruction tuning (2023)"},{"key":"23_CR11","unstructured":"Desai, K., Nickel, M., Rajpurohit, T., Johnson, J., Vedantam, S.R.: Hyperbolic image-text representations. In: International Conference on Machine Learning, pp. 7694\u20137731. PMLR (2023)"},{"key":"23_CR12","doi-asserted-by":"crossref","unstructured":"Dhingra, B., Shallue, C.J., Norouzi, M., Dai, A.M., Dahl, G.E.: Embedding text in hyperbolic spaces. In: TextGraphs@NAACL-HLT (2018)","DOI":"10.18653\/v1\/W18-1708"},{"key":"23_CR13","doi-asserted-by":"crossref","unstructured":"Ermolov, A., Mirvakhabova, L., Khrulkov, V., Sebe, N., Oseledets, I.: Hyperbolic vision transformers: combining improvements in metric learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 7409\u20137419 (2022)","DOI":"10.1109\/CVPR52688.2022.00726"},{"key":"23_CR14","doi-asserted-by":"publisher","unstructured":"Flaborea, A., et al.: Are we certain it\u2019s anomalous? In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW), pp. 2897\u20132907. IEEE Computer Society, Los Alamitos (2023). https:\/\/doi.org\/10.1109\/CVPRW59228.2023.00291","DOI":"10.1109\/CVPRW59228.2023.00291"},{"key":"23_CR15","unstructured":"Franco, L., Mandica, P., Kallidromitis, K., Guillory, D., Li, Y.T., Galasso, F.: Hyperbolic active learning for semantic segmentation under domain shift. arXiv preprint arXiv:2306.11180 (2023)"},{"key":"23_CR16","unstructured":"Franco, L., Mandica, P., Munjal, B., Galasso, F.: Hyperbolic self-paced learning for self-supervised skeleton-based action representations. arXiv preprint arXiv:2303.06242 (2023)"},{"key":"23_CR17","unstructured":"Ganea, O., B\u00e9cigneul, G., Hofmann, T.: Hyperbolic neural networks. Adv. Neural Inf. Process. Syst. 31 (2018)"},{"key":"23_CR18","doi-asserted-by":"crossref","unstructured":"Ge, S., Mishra, S.K., Kornblith, S., Li, C.L., Jacobs, D.: Hyperbolic contrastive learning for visual representations beyond objects. In: 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52729.2023.00661"},{"key":"23_CR19","unstructured":"Grill, J.B., et al.: Bootstrap your own latent a new approach to self-supervised learning. In: Proceedings of the 34th International Conference on Neural Information Processing Systems, NIPS 2020 (2020)"},{"key":"23_CR20","unstructured":"Gulcehre, C., et\u00a0al.: Hyperbolic attention networks. arXiv preprint arXiv:1805.09786 (2018)"},{"key":"23_CR21","doi-asserted-by":"crossref","unstructured":"Guo, Y., Wang, X., Chen, Y., Yu, S.X.: Clipped hyperbolic classifiers are super-hyperbolic classifiers. In: 2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 1\u201310 (2021). https:\/\/api.semanticscholar.org\/CorpusID:244731271","DOI":"10.1109\/CVPR52688.2022.00010"},{"key":"23_CR22","unstructured":"Ibrahimi, S., Atigh, M.G., Van\u00a0Noord, N., Mettes, P., Worring, M.: Intriguing properties of hyperbolic embeddings in vision-language models. Trans. Mach. Learn. Res. (2024)"},{"key":"23_CR23","doi-asserted-by":"crossref","unstructured":"Khrulkov, V., Mirvakhabova, L., Ustinova, E., Oseledets, I., Lempitsky, V.: Hyperbolic image embeddings. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6418\u20136428 (2020)","DOI":"10.1109\/CVPR42600.2020.00645"},{"key":"23_CR24","doi-asserted-by":"crossref","unstructured":"Lai, X., et al.: LISA: reasoning segmentation via large language model. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9579\u20139589 (2024)","DOI":"10.1109\/CVPR52733.2024.00915"},{"key":"23_CR25","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: International Conference on Machine Learning, pp. 19730\u201319742. PMLR (2023)"},{"key":"23_CR26","unstructured":"Lin, C.Y.: ROUGE: a package for automatic evaluation of summaries. In: Text Summarization Branches Out, pp. 74\u201381. Association for Computational Linguistics, Barcelona (2004). https:\/\/aclanthology.org\/W04-1013"},{"key":"23_CR27","unstructured":"Lin, T., et al.: Microsoft COCO: common objects in context. CoRR abs\/1405.0312 (2014). http:\/\/arxiv.org\/abs\/1405.0312"},{"key":"23_CR28","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"23_CR29","unstructured":"Liu, Q., Nickel, M., Kiela, D.: Hyperbolic graph neural networks. Adv. Neural Inf. Process. Syst. 32 (2019)"},{"key":"23_CR30","doi-asserted-by":"crossref","unstructured":"Mettes, P., Ghadimi\u00a0Atigh, M., Keller-Ressel, M., Gu, J., Yeung, S.: Hyperbolic deep learning in computer vision: A survey. Int. J. Comput. Vision 1\u201325 (2024)","DOI":"10.1007\/s11263-024-02043-5"},{"key":"23_CR31","unstructured":"Nickel, M., Kiela, D.: Poincar\u00e9 embeddings for learning hierarchical representations (2017)"},{"key":"23_CR32","doi-asserted-by":"publisher","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.J.: BLEU: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting on Association for Computational Linguistics, ACL 2002, pp. 311\u2013318. Association for Computational Linguistics, USA (2002). https:\/\/doi.org\/10.3115\/1073083.1073135","DOI":"10.3115\/1073083.1073135"},{"key":"23_CR33","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"23_CR34","unstructured":"Shimizu, R., Mukuta, Y., Harada, T.: Hyperbolic neural networks++. arXiv preprint arXiv:2006.08210 (2020)"},{"key":"23_CR35","doi-asserted-by":"crossref","unstructured":"van Spengler, M., Berkhout, E., Mettes, P.: Poincar\u00e9 ResNet. In: 2023 IEEE\/CVF International Conference on Computer Vision (ICCV), pp. 5396\u20135405 (2023). https:\/\/api.semanticscholar.org\/CorpusID:257757355","DOI":"10.1109\/ICCV51070.2023.00499"},{"key":"23_CR36","doi-asserted-by":"crossref","unstructured":"Sur\u00eds, D., Liu, R., Vondrick, C.: Learning the predictability of the future. In: Proceedings of the Conference on Computer Vision and Pattern Recognition, pp. 12607\u201312617 (2021)","DOI":"10.1109\/CVPR46437.2021.01242"},{"key":"23_CR37","unstructured":"Tifrea, A., B\u00e9cigneul, G., Ganea, O.E.: Poincar\u00e9 GloVe: hyperbolic word embeddings. arXiv preprint arXiv:1810.06546 (2018)"},{"key":"23_CR38","doi-asserted-by":"crossref","unstructured":"Vedantam, R., Lawrence\u00a0Zitnick, C., Parikh, D.: CIDEr: consensus-based image description evaluation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4566\u20134575 (2015)","DOI":"10.1109\/CVPR.2015.7299087"},{"key":"23_CR39","doi-asserted-by":"crossref","unstructured":"Yan, J., Luo, L., Deng, C., Huang, H.: Unsupervised hyperbolic metric learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12465\u201312474 (2021)","DOI":"10.1109\/CVPR46437.2021.01228"},{"key":"23_CR40","unstructured":"Ye, Q., et\u00a0al.: mPLUG-Owl: modularization empowers large language models with multimodality. arXiv preprint arXiv:2304.14178 (2023)"},{"key":"23_CR41","unstructured":"Yin, S., et al.: A survey on multimodal large language models. arXiv preprint arXiv:2306.13549 (2023)"},{"key":"23_CR42","unstructured":"Zhang, S., et\u00a0al.: OPT: open pre-trained transformer language models. arXiv preprint arXiv:2205.01068 (2022)"},{"key":"23_CR43","unstructured":"Zhu, D., Chen, J., Shen, X., Li, X., Elhoseiny, M.: MiniGPT-4: enhancing vision-language understanding with advanced large language models. arXiv preprint arXiv:2304.10592 (2023)"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024 Workshops"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-91585-7_23","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,5,26]],"date-time":"2025-05-26T08:30:19Z","timestamp":1748248219000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-91585-7_23"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9783031915840","9783031915857"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-91585-7_23","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"12 May 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}