{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T20:37:12Z","timestamp":1770410232725,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":27,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819569625","type":"print"},{"value":"9789819569632","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-6963-2_17","type":"book-chapter","created":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T10:08:23Z","timestamp":1770372503000},"page":"167-175","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Fusionista2.0: Efficiency Retrieval System for\u00a0Large-Scale Datasets"],"prefix":"10.1007","author":[{"given":"Huy M.","family":"Le","sequence":"first","affiliation":[]},{"given":"Dat Tien","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Phuc Binh","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Gia Bao Le","family":"Tran","sequence":"additional","affiliation":[]},{"given":"Phu Truong","family":"Thien","sequence":"additional","affiliation":[]},{"given":"Cuong","family":"Dinh","sequence":"additional","affiliation":[]},{"given":"Minh","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Nga","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Thuy T. N.","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Huy Gia","family":"Ngo","sequence":"additional","affiliation":[]},{"given":"Tan Nhat","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Binh T.","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,7]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"Chen, Z., et\u00a0al.: InternVL: scaling up vision foundation models and aligning for generic visual-linguistic tasks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 24185\u201324198 (2024)","DOI":"10.1109\/CVPR52733.2024.02283"},{"key":"17_CR2","doi-asserted-by":"publisher","unstructured":"Cui, Y., Zhao, L., Liang, F., Li, Y., Shao, J.: Democratizing contrastive language-image pre-training: a CLIP benchmark of data, model, and supervision. CoRR abs\/2203.05796 (2022). https:\/\/doi.org\/10.48550\/ARXIV.2203.05796","DOI":"10.48550\/ARXIV.2203.05796"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Doan, K.T., et al.: Vintern-1B: an efficient multimodal large language model for Vietnamese (2024). https:\/\/arxiv.org\/abs\/2408.12480","DOI":"10.1109\/MAPR59823.2023.10288647"},{"key":"17_CR4","unstructured":"Facebook: create-react-app: set up a modern web app by running one command. https:\/\/github.com\/facebook\/create-react-app. Accessed 17 Sept 2025"},{"key":"17_CR5","doi-asserted-by":"publisher","unstructured":"Feng, C., et al.: VQA4CIR: boosting composed image retrieval with visual question answering. In: Walsh, T., Shah, J., Kolter, Z. (eds.) AAAI-25, Sponsored by the Association for the Advancement of Artificial Intelligence, 25 February\u20134 March 2025, Philadelphia, PA, USA, pp. 2942\u20132950. AAAI Press (2025). https:\/\/doi.org\/10.1609\/AAAI.V39I3.32301","DOI":"10.1609\/AAAI.V39I3.32301"},{"key":"17_CR6","doi-asserted-by":"crossref","unstructured":"Kwon, W., et al.: Efficient memory management for large language model serving with pagedattention. In: Proceedings of the ACM SIGOPS 29th Symposium on Operating Systems Principles (2023)","DOI":"10.1145\/3600006.3613165"},{"key":"17_CR7","doi-asserted-by":"publisher","unstructured":"Le, H.M., Luong, V.T., Luong, N.H.: Data augmentation with large language models for Vietnamese abstractive text summarization. In: International Conference on Multimedia Analysis and Pattern Recognition, MAPR 2023, Quy Nhon, Vietnam, 5\u20136 October 2023, pp.\u00a01\u20136. IEEE (2023). https:\/\/doi.org\/10.1109\/MAPR59823.2023.10288906","DOI":"10.1109\/MAPR59823.2023.10288906"},{"key":"17_CR8","doi-asserted-by":"crossref","unstructured":"Le, H.M., et al.: Fustar: Divide and Conquer Query in Video Retrieval System, Information and Communication Technology, vol.\u00a02353, 1st edn. Springer, Singapore (2025)","DOI":"10.1007\/978-981-96-4291-5_8"},{"key":"17_CR9","doi-asserted-by":"publisher","unstructured":"Le, H.M., et al.: Fusionista: fusion of 3-D information of video in retrieval system. In: Ide, I., et al. (eds.) MMM 2025, Part V. LNCS, vol. 15524, pp. 278\u2013285. Springer, Singapore (2025). https:\/\/doi.org\/10.1007\/978-981-96-2074-6_33","DOI":"10.1007\/978-981-96-2074-6_33"},{"key":"17_CR10","unstructured":"Li, B., et al.: LLaVA-OneVision: easy visual task transfer. Trans. Mach. Learn. Res. 2025 (2025). https:\/\/openreview.net\/forum?id=zKv8qULV6n"},{"key":"17_CR11","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: Proceedings of the 40th International Conference on Machine Learning, ICML2023. JMLR.org (2023)"},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Manning, C.D., Raghavan, P., Sch\u00fctze, H.: Introduction to Information Retrieval. Cambridge University Press (2008)","DOI":"10.1017\/CBO9780511809071"},{"key":"17_CR13","unstructured":"Marafioti, A., et al.: SmolVLM: redefining small and efficient multimodal models. arXiv preprint arXiv:2504.05299 (2025)"},{"key":"17_CR14","doi-asserted-by":"publisher","unstructured":"Nasirihaghighi, S., et al.: GynSurg: a comprehensive gynecology laparoscopic surgery dataset. CoRR abs\/2506.11356 (2025). https:\/\/doi.org\/10.48550\/ARXIV.2506.11356","DOI":"10.48550\/ARXIV.2506.11356"},{"key":"17_CR15","unstructured":"OpenAI: GPT-4o system card (2024). https:\/\/arxiv.org\/abs\/2410.21276"},{"key":"17_CR16","unstructured":"Radford, A., et\u00a0al.: Learning transferable visual models from natural language supervision. In: Proceedings of ICML (2021)"},{"key":"17_CR17","unstructured":"Radford, A., Kim, J.W., Xu, T., Brockman, G., McLeavey, C., Sutskever, I.: Robust speech recognition via large-scale weak supervision. In: International Conference on Machine Learning, ICML 2023, 23\u201329 July 2023, Honolulu, Hawaii, USA. Proceedings of Machine Learning Research, vol.\u00a0202, pp. 28492\u201328518. PMLR (2023). https:\/\/proceedings.mlr.press\/v202\/radford23a.html"},{"key":"17_CR18","unstructured":"Rossetto, L., Lokoc, J., Bailer, W., Schoeffmann, K., Awad, G.: V3C\u2013a research video collection. In: Proceedings of the 9th ACM Multimedia Systems Conference, pp. 461\u2013466 (2019)"},{"issue":"3","key":"17_CR19","first-page":"207","volume":"1","author":"K Schoeffmann","year":"2012","unstructured":"Schoeffmann, K., Boeszoermenyi, L., Beesley, P.: The video browser showdown: a live evaluation of interactive video retrieval systems. Int. J. Multimed. Inf. Retrieval 1(3), 207\u2013227 (2012)","journal-title":"Int. J. Multimed. Inf. Retrieval"},{"key":"17_CR20","unstructured":"Sou\u010dek, T., Loko\u010d, J.: Transnet v2: an effective deep network architecture for fast shot transition detection. In: Proceedings of the IEEE International Conference on Image Processing (ICIP), pp. 151\u2013155 (2020)"},{"key":"17_CR21","unstructured":"SYSTRAN: faster-whisper: Transcription with ctranslate2. https:\/\/github.com\/SYSTRAN\/faster-whisper (2024). GitHub repository, MIT License"},{"key":"17_CR22","doi-asserted-by":"publisher","unstructured":"Tan, K., Zhou, Y., Xia, Q., Liu, R., Chen, Y.: Large model based sequential keyframe extraction for video summarization. In: Proceedings of the International Conference on Computing, Machine Learning and Data Science, CMLDS 2024, Singapore, 12\u201314 April 2024, pp. 52:1\u201352:5. ACM (2024). https:\/\/doi.org\/10.1145\/3661725.3661781","DOI":"10.1145\/3661725.3661781"},{"key":"17_CR23","doi-asserted-by":"publisher","unstructured":"Truong, Q., et al.: Marine video kit: a new marine video dataset for content-based analysis and retrieval. In: Dang-Nguyen, D.T., et al. (eds.) MMM 2023, Part I. LNCS, vol. 13833, pp. 539\u2013550. Springer, Cham (2023). https:\/\/doi.org\/10.1007\/978-3-031-27077-2_42","DOI":"10.1007\/978-3-031-27077-2_42"},{"key":"17_CR24","unstructured":"shadcn ui: shadcn-ui\/ui: a set of beautifully-designed, accessible components and a code distribution platform. https:\/\/github.com\/shadcn-ui\/ui. Accessed 17 Sept 2025"},{"key":"17_CR25","unstructured":"Vite contributors: Guide\u2014vite. https:\/\/vite.dev\/guide\/. Accessed 17 Sept 2025"},{"key":"17_CR26","unstructured":"World Wide Web Consortium (W3C): Web content accessibility guidelines (WCAG) 2.1 (2018). https:\/\/www.w3.org\/TR\/WCAG21\/. Accessed 17 Sept 2025"},{"key":"17_CR27","doi-asserted-by":"publisher","unstructured":"Zhang, H., Li, X., Bing, L.: Video-LLaMA: an instruction-tuned audio-visual language model for video understanding. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, EMNLP 2023 - System Demonstrations, Singapore, 6\u201310 December 2023, pp. 543\u2013553. Association for Computational Linguistics (2023). https:\/\/doi.org\/10.18653\/V1\/2023.EMNLP-DEMO.49","DOI":"10.18653\/V1\/2023.EMNLP-DEMO.49"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-6963-2_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,6]],"date-time":"2026-02-06T10:08:28Z","timestamp":1770372508000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-6963-2_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819569625","9789819569632"],"references-count":27,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-6963-2_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"7 February 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Prague","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"29 January 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31 January 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"32","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2026.cz\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}