{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T18:03:43Z","timestamp":1779818623721,"version":"3.53.1"},"publisher-location":"Singapore","reference-count":20,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819620739","type":"print"},{"value":"9789819620746","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,1,1]],"date-time":"2025-01-01T00:00:00Z","timestamp":1735689600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2074-6_35","type":"book-chapter","created":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T11:05:49Z","timestamp":1735643149000},"page":"294-301","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["IMSearch 2.0: Toward User-Centric and Efficient Interactive Multimedia Retrieval System"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3893-8582","authenticated-orcid":false,"given":"Duc-Tuan","family":"Luu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7096-111X","authenticated-orcid":false,"given":"Khanh-An C.","family":"Quan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Duy-Ngoc","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Khanh-Linh","family":"Bui-Le","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Nhat-Sang","family":"Doan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Minh-Duc","family":"Le-Ngo","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4260-7874","authenticated-orcid":false,"given":"Vinh-Tiep","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-3046-3041","authenticated-orcid":false,"given":"Minh-Triet","family":"Tran","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,1,1]]},"reference":[{"key":"35_CR1","doi-asserted-by":"crossref","unstructured":"Amato, G., et al.: VISIONE at video browser showdown 2023. In: International conference on multimedia modeling, pp. 615\u2013621. Springer (2023)","DOI":"10.1007\/978-3-031-27077-2_48"},{"key":"35_CR2","doi-asserted-by":"crossref","unstructured":"Asan, U., Ercan, S.: An introduction to self-organizing maps. In: Computational intelligence systems in industrial engineering: with recent theory and applications, pp. 295\u2013315. Springer (2012)","DOI":"10.2991\/978-94-91216-77-0_14"},{"key":"35_CR3","unstructured":"Baevski, A., Zhou, Y., Mohamed, A., Auli, M.: wav2vec 2.0: a framework for self-supervised learning of speech representations. Adv. Neural Inf. Process. Syst. 33, 12449\u201312460 (2020)"},{"key":"35_CR4","doi-asserted-by":"crossref","unstructured":"Bautista, D., Atienza, R.: Scene text recognition with permuted autoregressive sequence models. In: ECCV, pp. 178\u2013196 (2022)","DOI":"10.1007\/978-3-031-19815-1_11"},{"key":"35_CR5","doi-asserted-by":"crossref","unstructured":"Do, T.L., et\u00a0al.: News event retrieval from large video collection in Ho Chi Minh City AI challenge 2023. In: Proceedings of the 12th International Symposium on Information and Communication Technology, pp. 1011\u20131017 (2023)","DOI":"10.1145\/3628797.3628940"},{"key":"35_CR6","doi-asserted-by":"crossref","unstructured":"Hezel, N., Schall, K., Jung, K., Barthel, K.U.: Efficient search and browsing of large-scale video collections with vibro. In: International Conference on Multimedia Modeling, pp. 487\u2013492. Springer (2022)","DOI":"10.1007\/978-3-030-98355-0_43"},{"key":"35_CR7","unstructured":"Jia, C., et al.: Scaling up visual and vision-language representation learning with noisy text supervision. In: International Conference on Machine Learning, pp. 4904\u20134916. PMLR (2021)"},{"issue":"3","key":"35_CR8","doi-asserted-by":"publisher","first-page":"535","DOI":"10.1109\/TBDATA.2019.2921572","volume":"7","author":"J Johnson","year":"2019","unstructured":"Johnson, J., Douze, M., J\u00e9gou, H.: Billion-scale similarity search with GPUs. IEEE Trans. Big Data 7(3), 535\u2013547 (2019)","journal-title":"IEEE Trans. Big Data"},{"key":"35_CR9","unstructured":"Koukounas, A., et\u00a0al.: Jina clip: Your clip model is also your text retriever. arXiv preprint arXiv:2405.20204 (2024)"},{"key":"35_CR10","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: bootstrapping language-image pre-training for unified vision-language understanding and generation. In: ICML, pp. 12888\u201312900 (2022)"},{"key":"35_CR11","doi-asserted-by":"crossref","unstructured":"Luu, D.T., et al.: Cdc: Color-based diffusion model with caption embedding in vbs 2022, p. 575\u2013579 (2022)","DOI":"10.1007\/978-3-030-98355-0_57"},{"key":"35_CR12","doi-asserted-by":"crossref","unstructured":"Nguyen, T.N., et al.: VideoCLIP: an interactive CLIP-based video retrieval system at VBS2023. In: International Conference on Multimedia Modeling, pp. 671\u2013677. Springer (2023)","DOI":"10.1007\/978-3-031-27077-2_57"},{"key":"35_CR13","doi-asserted-by":"crossref","unstructured":"Nguyen, T.N., Quang, L.M., Healy, G., Nguyen, B.T., Gurrin, C.: VideoCLIP 2.0: an interactive clip-based video retrieval system for novice users at vbs2024. In: International Conference on Multimedia Modeling, pp. 394\u2013399. Springer (2024)","DOI":"10.1007\/978-3-031-53302-0_37"},{"key":"35_CR14","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763. PMLR (2021)"},{"issue":"4","key":"35_CR15","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1002\/(SICI)1097-4571(199006)41:4<288::AID-ASI8>3.0.CO;2-H","volume":"41","author":"G Salton","year":"1990","unstructured":"Salton, G., Buckley, C.: Improving retrieval performance by relevance feedback. J. Am. Soc. Inf. Sci. 41(4), 288\u2013297 (1990)","journal-title":"J. Am. Soc. Inf. Sci."},{"key":"35_CR16","doi-asserted-by":"crossref","unstructured":"Vadicamo, L., et\u00a0al.: Evaluating performance and trends in interactive video retrieval: Insights from the 12th VBS competition. IEEE Access (2024)","DOI":"10.1109\/ACCESS.2024.3405638"},{"issue":"3","key":"35_CR17","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1037\/0278-7393.28.3.509","volume":"28","author":"FA Wichmann","year":"2002","unstructured":"Wichmann, F.A., Sharpe, L.T., Gegenfurtner, K.R.: The contributions of color to recognition memory for natural scenes. J. Exp. Psychol. Learn. Mem. Cogn. 28(3), 509 (2002)","journal-title":"J. Exp. Psychol. Learn. Mem. Cogn."},{"key":"35_CR18","doi-asserted-by":"crossref","unstructured":"Wu, J.: Advances in K-means clustering: a data mining thinking. Springer Science & Business Media (2012)","DOI":"10.1007\/978-3-642-29807-3"},{"key":"35_CR19","doi-asserted-by":"crossref","unstructured":"Ye, M., et al.: DeepSolo: let transformer decoder with explicit points solo for text spotting. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19348\u201319357 (2023)","DOI":"10.1109\/CVPR52729.2023.01854"},{"key":"35_CR20","unstructured":"Yuan, K., et al.: Learning multi-modal representations by watching hundreds of surgical video lectures. arXiv preprint arXiv:2307.15220 (2023)"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2074-6_35","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,5,26]],"date-time":"2026-05-26T17:21:44Z","timestamp":1779816104000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2074-6_35"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025]]},"ISBN":["9789819620739","9789819620746"],"references-count":20,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2074-6_35","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025]]},"assertion":[{"value":"1 January 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nara","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2025.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}