{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,6]],"date-time":"2025-11-06T11:46:41Z","timestamp":1762429601956,"version":"3.40.3"},"publisher-location":"Cham","reference-count":63,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031727832"},{"type":"electronic","value":"9783031727849"}],"license":[{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T00:00:00Z","timestamp":1727654400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-3-031-72784-9_22","type":"book-chapter","created":{"date-parts":[[2024,9,29]],"date-time":"2024-09-29T07:01:50Z","timestamp":1727593310000},"page":"390-408","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":4,"title":["Uncertainty-Aware Sign Language Video Retrieval with\u00a0Probability Distribution Modeling"],"prefix":"10.1007","author":[{"given":"Xuan","family":"Wu","sequence":"first","affiliation":[]},{"given":"Hongxiang","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yuanjiang","family":"Luo","sequence":"additional","affiliation":[]},{"given":"Xuxin","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"Xianwei","family":"Zhuang","sequence":"additional","affiliation":[]},{"given":"Meng","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Keren","family":"Fu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,9,30]]},"reference":[{"key":"22_CR1","unstructured":"Albanie, S.,et\u00a0al.: Bbc-oxford british sign language dataset (2021). arXiv preprint arXiv:2111.03635"},{"key":"22_CR2","doi-asserted-by":"crossref","unstructured":"Camgoz, N.C., Hadfield, S., Koller, O., Ney, H., Bowden, R.: Neural sign language translation. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7784\u20137793 (2018)","DOI":"10.1109\/CVPR.2018.00812"},{"key":"22_CR3","doi-asserted-by":"crossref","unstructured":"Camgoz, N.C., Koller, O., Hadfield, S., Bowden, R.: Multi-channel transformers for multi-articulatory sign language translation. In: Computer Vision\u2013ECCV 2020 Workshops: Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IV 16. pp. 301\u2013319. Springer (2020)","DOI":"10.1007\/978-3-030-66823-5_18"},{"key":"22_CR4","unstructured":"Camgoz, N.C., Koller, O., Hadfield, S., Bowden, R.: Sign language transformers: joint end-to-end sign language recognition and translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10023\u201310033 (2020)"},{"key":"22_CR5","doi-asserted-by":"crossref","unstructured":"Cao, M., Chen, L., Shou, M.Z., Zhang, C., Zou, Y.: On pursuit of designing multi-modal transformer for video grounding (2021). arXiv preprint arXiv:2109.06085","DOI":"10.18653\/v1\/2021.emnlp-main.773"},{"key":"22_CR6","doi-asserted-by":"crossref","unstructured":"Cao, M., et al.: Iterative proposal refinement for weakly-supervised video grounding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6524\u20136534 (2023)","DOI":"10.1109\/CVPR52729.2023.00631"},{"key":"22_CR7","doi-asserted-by":"crossref","unstructured":"Cao, M., Yang, T., Weng, J., Zhang, C., Wang, J., Zou, Y.: Locvtp: video-text pre-training for temporal localization. In: European Conference on Computer Vision, pp. 38\u201356. Springer (2022)","DOI":"10.1007\/978-3-031-19809-0_3"},{"key":"22_CR8","doi-asserted-by":"publisher","first-page":"5203","DOI":"10.1109\/TIP.2022.3193752","volume":"31","author":"M Cao","year":"2022","unstructured":"Cao, M., Zhang, C., Chen, L., Shou, M.Z., Zou, Y.: Deep motion prior for weakly-supervised temporal action localization. IEEE Trans. Image Process. 31, 5203\u20135213 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"22_CR9","doi-asserted-by":"crossref","unstructured":"Chang, J., Lan, Z., Cheng, C., Wei, Y.: Data uncertainty learning in face recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5710\u20135719 (2020)","DOI":"10.1109\/CVPR42600.2020.00575"},{"key":"22_CR10","unstructured":"Chen, G., Yao, W., Song, X., Li, X., Rao, Y., Zhang, K.: Plot: Prompt learning with optimal transport for vision-language models (2022). arXiv preprint arXiv:2210.01253"},{"key":"22_CR11","doi-asserted-by":"crossref","unstructured":"Chen, Y., Wang, J., Lin, L., Qi, Z., Ma, J., Shan, Y.: Tagging before alignment: Integrating multi-modal tags for video-text retrieval (2023). arXiv preprint arXiv:2301.12644","DOI":"10.1609\/aaai.v37i1.25113"},{"key":"22_CR12","first-page":"17043","volume":"35","author":"Y Chen","year":"2022","unstructured":"Chen, Y., Zuo, R., Wei, F., Wu, Y., Liu, S., Mak, B.: Two-stream network for sign language recognition and translation. Adv. Neural. Inf. Process. Syst. 35, 17043\u201317056 (2022)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"22_CR13","doi-asserted-by":"crossref","unstructured":"Cheng, K.L., Yang, Z., Chen, Q., Tai, Y.W.: Fully convolutional networks for continuous sign language recognition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXIV 16. pp. 697\u2013714. Springer (2020)","DOI":"10.1007\/978-3-030-58586-0_41"},{"key":"22_CR14","doi-asserted-by":"crossref","unstructured":"Cheng, Y., Wei, F., Bao, J., Chen, D., Zhang, W.: Cico: domain-aware sign language retrieval via cross-lingual contrastive learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19016\u201319026 (2023)","DOI":"10.1109\/CVPR52729.2023.01823"},{"key":"22_CR15","doi-asserted-by":"crossref","unstructured":"Chun, S., Oh, S.J., De\u00a0Rezende, R.S., Kalantidis, Y., Larlus, D.: Probabilistic embeddings for cross-modal retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8415\u20138424 (2021)","DOI":"10.1109\/CVPR46437.2021.00831"},{"key":"22_CR16","doi-asserted-by":"crossref","unstructured":"Cihan\u00a0Camgoz, N., Hadfield, S., Koller, O., Bowden, R.: Subunets: end-to-end hand shape and continuous sign language recognition. In: Proceedings of the IEEE international conference on computer vision, pp. 3056\u20133065 (2017)","DOI":"10.1109\/ICCV.2017.332"},{"key":"22_CR17","doi-asserted-by":"crossref","unstructured":"Cui, R., Liu, H., Zhang, C.: Recurrent convolutional neural networks for continuous sign language recognition by staged optimization. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7361\u20137369 (2017)","DOI":"10.1109\/CVPR.2017.175"},{"key":"22_CR18","unstructured":"Cuturi, M.: Sinkhorn distances: lightspeed computation of optimal transport. Adv. Neural Inf. Proc. Syst. 26 (2013)"},{"key":"22_CR19","doi-asserted-by":"crossref","unstructured":"Duarte, A., Albanie, S., Gir\u00f3-i Nieto, X., Varol, G.: Sign language video retrieval with free-form textual queries. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14094\u201314104 (2022)","DOI":"10.1109\/CVPR52688.2022.01370"},{"key":"22_CR20","doi-asserted-by":"crossref","unstructured":"Duarte, A., et al.: How2sign: a large-scale multimodal dataset for continuous American sign language. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2735\u20132744 (2021)","DOI":"10.1109\/CVPR46437.2021.00276"},{"key":"22_CR21","doi-asserted-by":"crossref","unstructured":"Fang, B., et\u00a0al.: Uatvr: Uncertainty-adaptive text-video retrieval (2023). arXiv preprint arXiv:2301.06309","DOI":"10.1109\/ICCV51070.2023.01262"},{"key":"22_CR22","unstructured":"Fang, H., Xiong, P., Xu, L., Chen, Y.: Clip2video: Mastering video-text retrieval via image clip (2021). arXiv preprint arXiv:2106.11097"},{"key":"22_CR23","doi-asserted-by":"crossref","unstructured":"Gorti, S.K., et al.: X-pool: cross-modal language-video attention for text-video retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5006\u20135015 (2022)","DOI":"10.1109\/CVPR52688.2022.00495"},{"key":"22_CR24","doi-asserted-by":"crossref","unstructured":"Huang, J., Zhou, W., Zhang, Q., Li, H., Li, W.: Video-based sign language recognition without temporal segmentation. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a032 (2018)","DOI":"10.1609\/aaai.v32i1.11903"},{"key":"22_CR25","doi-asserted-by":"crossref","unstructured":"Jiang, S., Sun, B., Wang, L., Bai, Y., Li, K., Fu, Y.: Skeleton aware multi-modal sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3413\u20133423 (2021)","DOI":"10.1109\/CVPRW53098.2021.00380"},{"key":"22_CR26","doi-asserted-by":"crossref","unstructured":"Jiao, P., Min, Y., Li, Y., Wang, X., Lei, L., Chen, X.: Cosign: exploring co-occurrence signals in skeleton-based continuous sign language recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20676\u201320686 (2023)","DOI":"10.1109\/ICCV51070.2023.01890"},{"key":"22_CR27","doi-asserted-by":"crossref","unstructured":"Jin, P., et al.: Video-text as game players: Hierarchical banzhaf interaction for cross-modal representation learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2472\u20132482 (2023)","DOI":"10.1109\/CVPR52729.2023.00244"},{"key":"22_CR28","unstructured":"Kingma, D.P., Welling, M.: Auto-encoding variational bayes (2013). arXiv preprint arXiv:1312.6114"},{"key":"22_CR29","doi-asserted-by":"crossref","unstructured":"Koller, O., Zargaran, S., Ney, H.: Re-sign: re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4297\u20134305 (2017)","DOI":"10.1109\/CVPR.2017.364"},{"key":"22_CR30","doi-asserted-by":"crossref","unstructured":"Lee, T., Oh, Y., Lee, K.M.: Human part-wise 3d motion context learning for sign language recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20740\u201320750 (2023)","DOI":"10.1109\/ICCV51070.2023.01896"},{"key":"22_CR31","doi-asserted-by":"crossref","unstructured":"Li, D., Rodriguez, C., Yu, X., Li, H.: Word-level deep sign language recognition from video: a new large-scale dataset and methods comparison. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 1459\u20131469 (2020)","DOI":"10.1109\/WACV45572.2020.9093512"},{"key":"22_CR32","first-page":"12034","volume":"33","author":"D Li","year":"2020","unstructured":"Li, D., et al.: Tspnet: hierarchical feature learning via temporal semantic pyramid for sign language translation. Adv. Neural. Inf. Process. Syst. 33, 12034\u201312045 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"22_CR33","doi-asserted-by":"crossref","unstructured":", Li, D., Yu, X., Xu, C., Petersson, L., Li, H.: Transferring cross-domain knowledge for video sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 6205\u20136214 (2020)","DOI":"10.1109\/CVPR42600.2020.00624"},{"key":"22_CR34","doi-asserted-by":"crossref","unstructured":"Li, H., Cao, M., Cheng, X., Li, Y., Zhu, Z., Zou, Y.: G2l: semantically aligned and uniform video grounding via geodesic and game theory. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 12032\u201312042 (2023)","DOI":"10.1109\/ICCV51070.2023.01105"},{"key":"22_CR35","doi-asserted-by":"crossref","unstructured":"Li, H., Cao, M., Cheng, X., Li, Y., Zhu, Z., Zou, Y.: Exploiting auxiliary caption for video grounding. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a038, pp. 18508\u201318516 (2024)","DOI":"10.1609\/aaai.v38i17.29812"},{"key":"22_CR36","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/j.neucom.2022.07.028","volume":"508","author":"H Luo","year":"2022","unstructured":"Luo, H., et al.: Clip4clip: an empirical study of clip for end to end video clip retrieval and captioning. Neurocomputing 508, 293\u2013304 (2022)","journal-title":"Neurocomputing"},{"key":"22_CR37","doi-asserted-by":"crossref","unstructured":"Luo, Y., et al.: Textual inversion and self-supervised refinement for radiology report generation (2024)","DOI":"10.1007\/978-3-031-72086-4_64"},{"key":"22_CR38","doi-asserted-by":"crossref","unstructured":"Ma, Y., Xu, G., Sun, X., Yan, M., Zhang, J., Ji, R.: X-clip: end-to-end multi-grained contrastive learning for video-text retrieval. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 638\u2013647 (2022)","DOI":"10.1145\/3503161.3547910"},{"key":"22_CR39","doi-asserted-by":"crossref","unstructured":"Momeni, L., Varol, G., Albanie, S., Afouras, T., Zisserman, A.: Watch, read and lookup: learning to spot signs from multiple supervisors. In: Proceedings of the Asian Conference on Computer Vision (2020)","DOI":"10.1007\/978-3-030-69544-6_18"},{"key":"22_CR40","unstructured":"Oh, S.J., Murphy, K., Pan, J., Roth, J., Schroff, F., Gallagher, A.: Modeling uncertainty with hedged instance embedding (2018). arXiv preprint arXiv:1810.00319"},{"key":"22_CR41","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"key":"22_CR42","doi-asserted-by":"crossref","unstructured":"Shi, B., Brentari, D., Shakhnarovich, G., Livescu, K.: Open-domain sign language translation learned from online video (2022). arXiv preprint arXiv:2205.12870","DOI":"10.18653\/v1\/2022.emnlp-main.427"},{"key":"22_CR43","doi-asserted-by":"crossref","unstructured":"Shi, Y., Jain, A.K.: Probabilistic face embeddings. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 6902\u20136911 (2019)","DOI":"10.1109\/ICCV.2019.00700"},{"key":"22_CR44","doi-asserted-by":"crossref","unstructured":"Sun, J.J., Zhao, J., Chen, L.C., Schroff, F., Adam, H., Liu, T.: View-invariant probabilistic embedding for human pose. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part V 16. pp. 53\u201370. Springer (2020)","DOI":"10.1007\/978-3-030-58558-7_4"},{"key":"22_CR45","doi-asserted-by":"crossref","unstructured":"Varol, G., Momeni, L., Albanie, S., Afouras, T., Zisserman, A.: Read and attend: temporal localisation in sign language videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 16857\u201316866 (2021)","DOI":"10.1109\/CVPR46437.2021.01658"},{"key":"22_CR46","unstructured":"Vilnis, L., McCallum, A.: Word representations via gaussian embedding (2014). arXiv preprint arXiv:1412.6623"},{"key":"22_CR47","unstructured":"Wang, Q., Zhang, Y., Zheng, Y., Pan, P., Hua, X.S.: Disentangled representation learning for text-video retrieval (2022). arXiv preprint arXiv:2203.07111"},{"key":"22_CR48","doi-asserted-by":"crossref","unstructured":"Wang, Z., Sung, Y.L., Cheng, F., Bertasius, G., Bansal, M.: Unified coarse-to-fine alignment for video-text retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 2816\u20132827 (2023)","DOI":"10.1109\/ICCV51070.2023.00264"},{"key":"22_CR49","doi-asserted-by":"crossref","unstructured":"Wei, F., Chen, Y.: Improving continuous sign language recognition with cross-lingual signs. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 23612\u201323621 (2023)","DOI":"10.1109\/ICCV51070.2023.02158"},{"key":"22_CR50","doi-asserted-by":"crossref","unstructured":"Wu, W., Luo, H., Fang, B., Wang, J., Ouyang, W.: Cap4video: what can auxiliary captions do for text-video retrieval? In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10704\u201310713 (2023)","DOI":"10.1109\/CVPR52729.2023.01031"},{"key":"22_CR51","doi-asserted-by":"crossref","unstructured":"Xu, Z., Chen, Z., Zhang, Y., Song, Y., Wan, X., Li, G.: Bridging vision and language encoders: parameter-efficient tuning for referring image segmentation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 17503\u201317512 (2023)","DOI":"10.1109\/ICCV51070.2023.01605"},{"key":"22_CR52","unstructured":"Xu, Z., et al.: Enhancing fine-grained multi-modal alignment via adapters: a parameter-efficient training framework for referring image segmentation. In: 2nd Workshop on Advancing Neural Network Training: Computational Efficiency, Scalability, and Resource Optimization (WANT@ ICML 2024)"},{"key":"22_CR53","unstructured":"Xue, H., etal.: Clip-vip: Adapting pre-trained image-text model to video-language representation alignment (2022). arXiv preprint arXiv:2209.06430"},{"key":"22_CR54","doi-asserted-by":"crossref","unstructured":"Yao, H., Zhou, W., Feng, H., Hu, H., Zhou, H., Li, H.: Sign language translation with iterative prototype. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 15592\u201315601 (2023)","DOI":"10.1109\/ICCV51070.2023.01429"},{"key":"22_CR55","unstructured":"Yao, L., et al.: Filip: Fine-grained interactive language-image pre-training (2021). arXiv preprint arXiv:2111.07783"},{"key":"22_CR56","doi-asserted-by":"crossref","unstructured":"Yu, T., Li, D., Yang, Y., Hospedales, T.M., Xiang, T.: Robust person re-identification by modelling feature uncertainty. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 552\u2013561 (2019)","DOI":"10.1109\/ICCV.2019.00064"},{"key":"22_CR57","doi-asserted-by":"crossref","unstructured":"Zhang, H., Guo, Z., Yang, Y., Liu, X., Hu, D.: C2st: cross-modal contextualized sequence transduction for continuous sign language recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 21053\u201321062 (2023)","DOI":"10.1109\/ICCV51070.2023.01925"},{"key":"22_CR58","doi-asserted-by":"crossref","unstructured":"Zhao, S., Zhu, L., Wang, X., Yang, Y.: Centerclip: token clustering for efficient text-video retrieval. In: Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 970\u2013981 (2022)","DOI":"10.1145\/3477495.3531950"},{"key":"22_CR59","doi-asserted-by":"crossref","unstructured":"Zheng, J., et al.: Cvt-slr: contrastive visual-textual transformation for sign language recognition with variational alignment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 23141\u201323150 (2023)","DOI":"10.1109\/CVPR52729.2023.02216"},{"key":"22_CR60","doi-asserted-by":"crossref","unstructured":"Zhou, B., et al.: Gloss-free sign language translation: improving from visual-language pretraining. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 20871\u201320881 (2023)","DOI":"10.1109\/ICCV51070.2023.01908"},{"key":"22_CR61","doi-asserted-by":"crossref","unstructured":"Zhou, H., Zhou, W., Qi, W., Pu, J., Li, H.: Improving sign language translation with monolingual data by sign back-translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1316\u20131325 (2021)","DOI":"10.1109\/CVPR46437.2021.00137"},{"key":"22_CR62","doi-asserted-by":"crossref","unstructured":"Zuo, R., Mak, B.: C2slr: Consistency-enhanced continuous sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 5131\u20135140 (2022)","DOI":"10.1109\/CVPR52688.2022.00507"},{"key":"22_CR63","doi-asserted-by":"crossref","unstructured":"Zuo, R., Wei, F., Mak, B.: Natural language-assisted sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 14890\u201314900 (2023)","DOI":"10.1109\/CVPR52729.2023.01430"}],"container-title":["Lecture Notes in Computer Science","Computer Vision \u2013 ECCV 2024"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-72784-9_22","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,28]],"date-time":"2024-11-28T21:22:09Z","timestamp":1732828929000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-72784-9_22"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,9,30]]},"ISBN":["9783031727832","9783031727849"],"references-count":63,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-72784-9_22","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,9,30]]},"assertion":[{"value":"30 September 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ECCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"European Conference on Computer Vision","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Milan","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Italy","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"eccv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/eccv2024.ecva.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}