{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T20:04:23Z","timestamp":1769976263565,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557219","type":"print"},{"value":"9789819557226","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5722-6_16","type":"book-chapter","created":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T08:14:11Z","timestamp":1769933651000},"page":"194-203","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Frame-Wise Multimodal Retrieval in\u00a0Video Corpus with\u00a0Contrastive Learning"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-4879-1388","authenticated-orcid":false,"given":"Bo","family":"Lu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0007-0499-956X","authenticated-orcid":false,"given":"Guiyuan","family":"Liang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0008-0905-1214","authenticated-orcid":false,"given":"Tianbao","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0009-3066-1372","authenticated-orcid":false,"given":"Xiaoyuan","family":"Liang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0247-9866","authenticated-orcid":false,"given":"Ye","family":"Yuan","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,2]]},"reference":[{"key":"16_CR1","doi-asserted-by":"crossref","unstructured":"Gao, J., Sun, C., Yang, Z., Nevatia, R., \u201cTALL: temporal activity localization via language query,\u201d ICCV,: Venice, Italy, October 22\u201329, 2017. IEEE Computer Society 2017, 5277\u20135285 (2017)","DOI":"10.1109\/ICCV.2017.563"},{"key":"16_CR2","doi-asserted-by":"crossref","unstructured":"Miech, A., Alayrac, J., Smaira, L., Laptev, I., Sivic, J., Zisserman, A., \u201cEnd-to-end learning of visual representations from uncurated instructional videos,\u201d CVPR,: Seattle, WA, USA, June 13\u201319, 2020. Computer Vision Foundation \/ IEEE 2020, 9876\u20139886 (2020)","DOI":"10.1109\/CVPR42600.2020.00990"},{"issue":"4","key":"16_CR3","doi-asserted-by":"publisher","first-page":"41","DOI":"10.1007\/s11280-024-01249-4","volume":"27","author":"A Li","year":"2024","unstructured":"Li, A., Li, Y., Shao, Y.: Federated learning for supervised cross-modal retrieval[J]. World Wide Web (WWW) 27(4), 41 (2024)","journal-title":"World Wide Web (WWW)"},{"key":"16_CR4","doi-asserted-by":"crossref","unstructured":"M.\u00a0Hahn, A.\u00a0Kadav, J.\u00a0M. Rehg, and H.\u00a0P. Graf, \u201cTripping through time: Efficient localization of activities in videos,\u201d BMVC 2020, Virtual Event, UK, September 7-10, 2020. BMVA Press, 2020","DOI":"10.5244\/C.34.135"},{"key":"16_CR5","unstructured":"J.\u00a0Wu, G.\u00a0Li, S.\u00a0Liu, and L.\u00a0Lin, \u201cTree-structured policy based progressive reinforcement learning for temporally language grounding in video.\u201d AAAI Press, 2020, pp. 12\u00a0386\u201312\u00a0393"},{"key":"16_CR6","unstructured":"V.\u00a0Escorcia, M.\u00a0Soldan, J.\u00a0Sivic, B.\u00a0Ghanem, and B.\u00a0C. Russell, \u201cTemporal localization of moments in video collections with natural language,\u201d CoRR, vol. abs\/1907.12763, 2019"},{"key":"16_CR7","unstructured":"B.\u00a0Zhang, H.\u00a0Hu, J.\u00a0Lee, M.\u00a0Zhao, S.\u00a0Chammas, V.\u00a0Jain, E.\u00a0Ie, and F.\u00a0Sha, \u201cA hierarchical multi-modal encoder for moment localization in video corpus,\u201d ArXiv, vol. abs\/2011.09046, 2020. [Online]"},{"issue":"1","key":"16_CR8","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s11280-024-01239-6","volume":"27","author":"S Teng","year":"2024","unstructured":"Teng, S., Huang, W., Wu, N., et al.: Discrete cross-modal hashing with relaxation and label semantic guidance[J]. World Wide Web (WWW) 27(1), 4 (2024)","journal-title":"World Wide Web (WWW)"},{"key":"16_CR9","doi-asserted-by":"crossref","unstructured":"H.\u00a0Zhang, A.\u00a0Sun, W.\u00a0Jing, G.\u00a0Nan, L.\u00a0Zhen, J.\u00a0T. Zhou, and R.\u00a0S.\u00a0M. Goh, \u201cVideo corpus moment retrieval with contrastive learning,\u201d SIGIR Conference on Research and Development in Information Retrieval, Virtual Event, Canada, July 11-15, 2021. ACM, 2021, pp. 685\u2013695","DOI":"10.1145\/3404835.3462874"},{"key":"16_CR10","doi-asserted-by":"publisher","first-page":"1338","DOI":"10.1109\/TMM.2021.3063631","volume":"24","author":"H Tang","year":"2022","unstructured":"Tang, H., Zhu, J., Liu, M., Gao, Z., Cheng, Z.: Frame-wise cross-modal matching for video moment retrieval. IEEE Trans. Multim. 24, 1338\u20131349 (2022)","journal-title":"IEEE Trans. Multim."},{"key":"16_CR11","doi-asserted-by":"crossref","unstructured":"Opazo, C.R., Marrese-Taylor, E., Saleh, F.S., Li, H., Gould, S., \u201cProposal-free temporal moment localization of a natural-language query in video using guided attention,\u201d WACV,: Snowmass Village, CO, USA, March 1\u20135, 2020. IEEE 2020, 2453\u20132462 (2020)","DOI":"10.1109\/WACV45572.2020.9093328"},{"issue":"9","key":"16_CR12","doi-asserted-by":"publisher","first-page":"5281","DOI":"10.1109\/TCSVT.2023.3250518","volume":"33","author":"X Sun","year":"2023","unstructured":"Sun, X., Gao, J., Zhu, Y., Wang, X., Zhou, X.: Video moment retrieval via comprehensive relation-aware network. IEEE Trans. Circuits Syst. Video Technol. 33(9), 5281\u20135295 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"16_CR13","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"144","DOI":"10.1007\/978-3-642-33718-5_11","volume-title":"Computer Vision \u2013 ECCV 2012","author":"M Rohrbach","year":"2012","unstructured":"Rohrbach, M., Regneri, M., Andriluka, M., Amin, S., Pinkal, M., Schiele, B.: Script Data for Attribute-Based Recognition of Composite Activities. In: Fitzgibbon, A., Lazebnik, S., Perona, P., Sato, Y., Schmid, C. (eds.) ECCV 2012. LNCS, vol. 7572, pp. 144\u2013157. Springer, Heidelberg (2012). https:\/\/doi.org\/10.1007\/978-3-642-33718-5_11"},{"key":"16_CR14","doi-asserted-by":"crossref","unstructured":"Hendricks, L.A., Wang, O., Shechtman, E., Sivic, J., Darrell, T., Russell, B.C., \u201cLocalizing moments in video with natural language,\u201d ICCV,: Venice, Italy, October 22\u201329, 2017. IEEE Computer Society 2017, 5804\u20135813 (2017)","DOI":"10.1109\/ICCV.2017.618"},{"key":"16_CR15","doi-asserted-by":"crossref","unstructured":"Liu, M., Wang, X., Nie, L., He, X., Chen, B., Chua, T., \u201cAttentive moment retrieval in videos,\u201d SIGIR,: Ann Arbor, MI, USA, July 08\u201312, 2018. ACM 2018, 15\u201324 (2018)","DOI":"10.1145\/3209978.3210003"},{"key":"16_CR16","doi-asserted-by":"crossref","unstructured":"Wang, W., Huang, Y., Wang, L., \u201cLanguage-driven temporal activity localization: A semantic matching reinforcement learning model,\u201d CVPR,: Long Beach, CA, USA, June 16\u201320, 2019. Computer Vision Foundation \/ IEEE 2019, 334\u2013343 (2019)","DOI":"10.1109\/CVPR.2019.00042"},{"key":"16_CR17","doi-asserted-by":"crossref","unstructured":"Y.\u00a0Yuan, T.\u00a0Mei, and W.\u00a0Zhu, \u201cTo find where you talk: Temporal sentence localization in video with attention based location regression.\u201d Press, 2019, pp. 9159\u20139166","DOI":"10.1609\/aaai.v33i01.33019159"},{"key":"16_CR18","doi-asserted-by":"crossref","unstructured":"S.\u00a0Chen and Y.\u00a0Jiang, \u201cSemantic proposal for activity localization in videos via sentence query.\u201d AAAI Press, 2019, pp. 8199\u20138206","DOI":"10.1609\/aaai.v33i01.33018199"},{"key":"16_CR19","doi-asserted-by":"crossref","unstructured":"Ge, R., Gao, J., Chen, K., Nevatia, R., \u201cMAC: mining activity concepts for language-based temporal localization,\u201d WACV,: Waikoloa Village, HI, USA, January 7\u201311, 2019. IEEE 2019, 245\u2013253 (2019)","DOI":"10.1109\/WACV.2019.00032"},{"key":"16_CR20","unstructured":"J.\u00a0Wang, L.\u00a0Ma, and W.\u00a0Jiang, \u201cTemporally grounding language queries in videos by contextual boundary-aware prediction.\u201d AAAI Press, 2020, pp. 12\u00a0168\u201312\u00a0175"},{"key":"16_CR21","doi-asserted-by":"crossref","unstructured":"D.\u00a0He, X.\u00a0Zhao, J.\u00a0Huang, F.\u00a0Li, X.\u00a0Liu, and S.\u00a0Wen, \u201cRead, watch, and move: Reinforcement learning for temporally grounding natural language descriptions in videos.\u201d AAAI Press, 2019, pp. 8393\u20138400","DOI":"10.1609\/aaai.v33i01.33018393"},{"key":"16_CR22","doi-asserted-by":"crossref","unstructured":"Tan, R., Xu, H., Saenko, K., Plummer, B.A., \u201cLogan: Latent graph co-attention network for weakly-supervised video moment retrieval,\u201d WACV,: Waikoloa, HI, USA, January 3\u20138, 2021. IEEE 2021, 2082\u20132091 (2021)","DOI":"10.1109\/WACV48630.2021.00213"},{"key":"16_CR23","doi-asserted-by":"crossref","unstructured":"H.\u00a0Xu, K.\u00a0He, B.\u00a0A. Plummer, L.\u00a0Sigal, S.\u00a0Sclaroff, and K.\u00a0Saenko, \u201cMultilevel language and vision integration for text-to-clip retrieval.\u201d AAAI Press, 2019, pp. 9062\u20139069","DOI":"10.1609\/aaai.v33i01.33019062"},{"issue":"3","key":"16_CR24","doi-asserted-by":"publisher","first-page":"1646","DOI":"10.1109\/TCSVT.2021.3075470","volume":"32","author":"J Gao","year":"2022","unstructured":"Gao, J., Xu, C.: Learning video moment retrieval without a single annotated video. IEEE Trans. Circuits Syst. Video Technol. 32(3), 1646\u20131657 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"16_CR25","unstructured":"S.\u00a0Zhang, H.\u00a0Peng, J.\u00a0Fu, and J.\u00a0Luo, \u201cLearning 2d temporal adjacent networks for moment localization with natural language.\u201d AAAI Press, 2020, pp. 12\u00a0870\u201312\u00a0877"},{"issue":"3","key":"16_CR26","doi-asserted-by":"publisher","first-page":"2613","DOI":"10.1609\/aaai.v36i3.20163","volume":"36","author":"Z Wang","year":"2022","unstructured":"Wang, Z., Wang, L., Wu, T., Li, T., Wu, G.: Negative sample matters: A renaissance of metric learning for temporal grounding. Proceedings of the AAAI Conference on Artificial Intelligence 36(3), 2613\u20132623 (2022)","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"16_CR27","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"51","DOI":"10.1007\/978-3-030-26969-2_5","volume-title":"Intelligent Computing Theories and Application","author":"G Lejun","year":"2019","unstructured":"Lejun, G., Xiaolin, L., Xuemin, Y., Lipeng, Z., Yao, J., Ronggen, Y.: CBLNER: A Multi-models Biomedical Named Entity Recognition System Based on Machine Learning. In: Huang, D.-S., Jo, K.-H., Huang, Z.-K. (eds.) ICIC 2019. LNCS, vol. 11644, pp. 51\u201360. Springer, Cham (2019). https:\/\/doi.org\/10.1007\/978-3-030-26969-2_5"}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5722-6_16","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,1]],"date-time":"2026-02-01T08:14:15Z","timestamp":1769933655000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5722-6_16"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557219","9789819557226"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5722-6_16","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"2 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors declare that they have no conflictof interest.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenyang","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/apweb2025.sau.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}