{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,1]],"date-time":"2025-11-01T09:36:23Z","timestamp":1761989783869,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819755936"},{"type":"electronic","value":"9789819755943"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-981-97-5594-3_25","type":"book-chapter","created":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T15:06:26Z","timestamp":1723561586000},"page":"298-310","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Fine-Grained Cross-Modal Contrast Learning for Video-Text Retrieval"],"prefix":"10.1007","author":[{"given":"Hui","family":"Liu","sequence":"first","affiliation":[]},{"given":"Gang","family":"Lv","sequence":"additional","affiliation":[]},{"given":"Yanhong","family":"Gu","sequence":"additional","affiliation":[]},{"given":"Fudong","family":"Nian","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,14]]},"reference":[{"key":"25_CR1","doi-asserted-by":"crossref","unstructured":"Nian, F., et al.: Multi-level cross-modal semantic alignment network for video\u2013text retrieval. Mathematics 10(18), 3346 (2022)","DOI":"10.3390\/math10183346"},{"key":"25_CR2","doi-asserted-by":"crossref","unstructured":"Liu, S., et al.: Hit: hierarchical transformer with momentum contrast for video-text re-trieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.01170"},{"key":"25_CR3","unstructured":"Fang, H., et al.: Clip2video: mastering video-text retrieval via image clip. arxiv preprint arxiv:2106.11097 (2021)"},{"key":"25_CR4","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervi-sion. In: International Conference on Machine Learning. PMLR (2021)"},{"key":"25_CR5","doi-asserted-by":"crossref","unstructured":"Portillo-Quintero, J.A., Ortiz-Bayliss, J.C., Terashima-Mar\u00edn, H.: A straightforward framework for video retrieval using clip. In: Mexican Conference on Pattern Recognition, pp. 3\u201312. Springer, Cham (2021)","DOI":"10.1007\/978-3-030-77004-4_1"},{"key":"25_CR6","unstructured":"Patrick, M., et al.: Support-set bottlenecks for video-text representation learning. arxiv preprint arxiv:2010.02824 (2020)"},{"key":"25_CR7","doi-asserted-by":"publisher","first-page":"293","DOI":"10.1016\/j.neucom.2022.07.028","volume":"508","author":"H Luo","year":"2022","unstructured":"Luo, H., et al.: Clip4clip: an empirical study of clip for end to end video clip retrieval and captioning. Neurocomputing 508, 293\u2013304 (2022)","journal-title":"Neurocomputing"},{"key":"25_CR8","doi-asserted-by":"crossref","unstructured":"Ma, Y., et al.: X-clip: end-to-end multi-grained contrastive learning for video-text re-trieval. In: Proceedings of the 30th ACM International Conference on Multimedia (2022)","DOI":"10.1145\/3503161.3547910"},{"key":"25_CR9","doi-asserted-by":"crossref","unstructured":"Gorti, S.K., et al.: X-pool: cross-modal language-video attention for text-video re-trieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2022)","DOI":"10.1109\/CVPR52688.2022.00495"},{"key":"25_CR10","unstructured":"Yao, L., et al.: Filip: fine-grained interactive language-image pre-training. arXiv pre-print arXiv:2111.07783 (2021)"},{"key":"25_CR11","doi-asserted-by":"crossref","unstructured":"Lee, K.-H., et al.: Stacked cross attention for image-text matching. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01225-0_13"},{"key":"25_CR12","unstructured":"Kay, W., Carreira, J., Simonyan, K., et al.: The kinetics human action video dataset. arxiv pre-print arxiv:1705.06950 (2017)"},{"key":"25_CR13","unstructured":"Chen, D., Dolan, W.B.: Collecting highly parallel data for paraphrase evaluation. In: Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies (2011)"},{"key":"25_CR14","doi-asserted-by":"crossref","unstructured":"Zhu, L., Yi, Y.: Actbert: learning global-local video-text representations. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00877"},{"key":"25_CR15","doi-asserted-by":"crossref","unstructured":"Bain, M., et al.: Frozen in time: a joint video and image encoder for end-to-end retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.00175"},{"key":"25_CR16","unstructured":"Wang, Z., et al.: Simvlm: simple visual language model pretraining with weak supervision. arxiv preprint arxiv:2108.10904 (2021)"},{"key":"25_CR17","doi-asserted-by":"crossref","unstructured":"Dzabraev, M., et al.: Mdmmt: multidomain multimodal transformer for video retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (2021)","DOI":"10.1109\/CVPRW53098.2021.00374"},{"key":"25_CR18","doi-asserted-by":"crossref","unstructured":"Yang, J., et al.: Membridge: video-language pre-training with memory-augmented in-ter-modality bridge. IEEE Trans. Image Process. (2023)","DOI":"10.1109\/TIP.2023.3283916"},{"key":"25_CR19","unstructured":"Jiang, J., et al.: Tencent text-video retrieval: hierarchical cross-modal interactions with mul-ti-level representations. IEEE Access (2022)"},{"key":"25_CR20","doi-asserted-by":"crossref","unstructured":"Wang, Z., et al.: Unified coarse-to-fine alignment for video-text retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2023)","DOI":"10.1109\/ICCV51070.2023.00264"},{"key":"25_CR21","doi-asserted-by":"crossref","unstructured":"Liu, Y., et al.: Ts2-net: token shift and selection transformer for text-video retrieval. In: European Conference on Computer Vision. Springer, Cham (2022)","DOI":"10.1007\/978-3-031-19781-9_19"},{"key":"25_CR22","unstructured":"Zhai, A., Wu, H.-Y.: Classification is a strong baseline for deep metric learning. arxiv preprint arxiv:1811.12649 (2018)"},{"key":"25_CR23","doi-asserted-by":"crossref","unstructured":"Xu, J., et al.: Msr-vtt: a large video description dataset for bridging video and language. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (2016)","DOI":"10.1109\/CVPR.2016.571"},{"key":"25_CR24","doi-asserted-by":"crossref","unstructured":"Yu, Y., Kim, J., Kim, G.: A joint sequence fusion model for video question answering and retrieval. In: Proceedings of the European Conference on Computer Vi-sion (ECCV) (2018)","DOI":"10.1007\/978-3-030-01234-2_29"},{"key":"25_CR25","unstructured":"Liu, Y., et al.: Use what you have: Video retrieval using representations from collabora-tive experts. arxiv preprint arxiv:1907.13487 (2019)"},{"key":"25_CR26","unstructured":"Gabeur, V., et al.: Multi-modal transformer for video retrieval. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part IV 16. Springer (2020)"},{"key":"25_CR27","doi-asserted-by":"crossref","unstructured":"Croitoru, I., et al.: Teachtext: crossmodal generalized distillation for text-video retrieval. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (2021)","DOI":"10.1109\/ICCV48922.2021.01138"}],"container-title":["Lecture Notes in Computer Science","Advanced Intelligent Computing Technology and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-5594-3_25","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T15:11:15Z","timestamp":1723561875000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-5594-3_25"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9789819755936","9789819755943"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-5594-3_25","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"14 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ICIC","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Intelligent Computing","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Tianjin","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"5 August 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8 August 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"icic2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/www.ic-icc.cn\/2024\/index.htm","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}