{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:26:21Z","timestamp":1759883181969,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":35,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032060686","type":"print"},{"value":"9783032060693","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,8]],"date-time":"2025-10-08T00:00:00Z","timestamp":1759881600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-06069-3_10","type":"book-chapter","created":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T15:54:29Z","timestamp":1759852469000},"page":"119-131","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Lightweight Semantic Search For Low-Resource Languages: A Case Study In Vietnamese Information Retrieval"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-5684-1787","authenticated-orcid":false,"given":"Chi Mai","family":"Nguyen","sequence":"first","affiliation":[]},{"given":"Hoang Anh Phi","family":"Tran","sequence":"additional","affiliation":[]},{"given":"Phat Trien","family":"Thai","sequence":"additional","affiliation":[]},{"given":"Huu Doanh","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"Van Tuan","family":"Nguyen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,10,8]]},"reference":[{"key":"10_CR1","doi-asserted-by":"crossref","unstructured":"Chen, J., Xiao, S., Zhang, P., Luo, K., Lian, D., Liu, Z.: M3-embedding: multi-linguality, multi-functionality, multi-granularity text embeddings through self-knowledge distillation. In: Findings of the Association for Computational Linguistics ACL 2024, pp. 2318\u20132335 (2024)","DOI":"10.18653\/v1\/2024.findings-acl.137"},{"issue":"3","key":"10_CR2","first-page":"301","volume":"24","author":"TTT Do","year":"2021","unstructured":"Do, T.T.T., Nguyen, D.T.: A computational semantic information retrieval model for vietnamese texts. Int. J. Comput. Sci. Eng. 24(3), 301\u2013311 (2021)","journal-title":"Int. J. Comput. Sci. Eng."},{"key":"10_CR3","unstructured":"Douze, M., et al.: The faiss library. arXiv preprint arXiv:2401.08281 (2024)"},{"key":"10_CR4","unstructured":"Duc, N.Q., Son, L.H., Nhan, N.D., Minh, N.D.N., Huong, L.T., Sang, D.V.: Towards comprehensive vietnamese retrieval-augmented generation and large language models. arXiv preprint arXiv:2403.01616 (2024)"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Gao, C., Hu, H., Hu, P., Chen, J., Li, J., Huang, S.: Multilingual pretraining and instruction tuning improve cross-lingual knowledge alignment, but only shallowly. arXiv preprint arXiv:2404.04659 (2024)","DOI":"10.18653\/v1\/2024.naacl-long.339"},{"key":"10_CR6","unstructured":"Hasan, M.A., Tarannum, P., Dey, K., Razzak, I., Naseem, U.: Do large language models speak all languages equally? a comparative study in low-resource settings. arXiv preprint arXiv:2408.02237 (2024)"},{"key":"10_CR7","doi-asserted-by":"crossref","unstructured":"Hedderich, M.A., Lange, L., Adel, H., Str\u00f6tgen, J., Klakow, D.: A survey on recent approaches for natural language processing in low-resource scenarios. arXiv preprint arXiv:2010.12309 (2020)","DOI":"10.18653\/v1\/2021.naacl-main.201"},{"issue":"9","key":"10_CR8","doi-asserted-by":"publisher","first-page":"2159","DOI":"10.1093\/jamia\/ocae014","volume":"31","author":"W Hersh","year":"2024","unstructured":"Hersh, W.: Search still matters: information retrieval in the era of generative ai. J. Am. Med. Inform. Assoc. 31(9), 2159\u20132161 (2024)","journal-title":"J. Am. Med. Inform. Assoc."},{"key":"10_CR9","unstructured":"Ji, Z., et al.: Efficient document ranking with learnable late interactions. arXiv preprint arXiv:2406.17968 (2024)"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Karpukhin, V., et al.: Dense passage retrieval for open-domain question answering. In: EMNLP (1), pp. 6769\u20136781 (2020)","DOI":"10.18653\/v1\/2020.emnlp-main.550"},{"key":"10_CR11","doi-asserted-by":"crossref","unstructured":"Khattab, O., Zaharia, M.: Colbert: efficient and effective passage search via contextualized late interaction over bert. In: Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 39\u201348 (2020)","DOI":"10.1145\/3397271.3401075"},{"key":"10_CR12","doi-asserted-by":"crossref","unstructured":"Kong, W., et al.: Multi-aspect dense retrieval. In: Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining, pp. 3178\u20133186 (2022)","DOI":"10.1145\/3534678.3539137"},{"key":"10_CR13","unstructured":"Le, K., Nguyen, H., Le\u00a0Thanh, T., Nguyen, M.: Vimqa: A vietnamese dataset for advanced reasoning and explainable multi-hop question answering. In: Proceedings of the Thirteenth Language Resources and Evaluation Conference, pp. 6521\u20136529 (2022)"},{"key":"10_CR14","first-page":"15384","volume":"36","author":"J Lee","year":"2023","unstructured":"Lee, J., et al.: Rethinking the role of token retrieval in multi-vector retrieval. Adv. Neural. Inf. Process. Syst. 36, 15384\u201315405 (2023)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"10_CR15","doi-asserted-by":"crossref","unstructured":"Li, X., Jin, J., Zhou, Y., Zhang, Y., Zhang, P., Zhu, Y., Dou, Z.: From matching to generation: a survey on generative information retrieval. ACM Trans. Inf. Syst. (2024)","DOI":"10.1145\/3722552"},{"key":"10_CR16","unstructured":"Louis, A., Saxena, V., van Dijck, G., Spanakis, G.: Colbert-xm: a modular multi-vector representation model for zero-shot multilingual information retrieval. arXiv preprint arXiv:2402.15059 (2024)"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Macdonald, C., Tonellotto, N.: On approximate nearest neighbour selection for multi-stage dense retrieval. In: Proceedings of the 30th ACM International Conference on Information & Knowledge Management, pp. 3318\u20133322 (2021)","DOI":"10.1145\/3459637.3482156"},{"issue":"4","key":"10_CR18","doi-asserted-by":"publisher","first-page":"824","DOI":"10.1109\/TPAMI.2018.2889473","volume":"42","author":"YA Malkov","year":"2018","unstructured":"Malkov, Y.A., Yashunin, D.A.: Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE Trans. Pattern Anal. Mach. Intell. 42(4), 824\u2013836 (2018)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Nguyen, N.H., Nguyen, D.T., Nguyen, N.L.T.: Vietnamese words are not constructed from syllables: rethinking the role of word segmentation in natural language processing for vietnamese texts. In: Proceedings of the AAAI Conference on Artificial Intelligence, pp. 24069\u201324077 (2025)","DOI":"10.1609\/aaai.v39i22.34581"},{"key":"10_CR20","doi-asserted-by":"publisher","first-page":"109","DOI":"10.54939\/1859-1043.j.mst.99.2024.109-118","volume":"99","author":"XB Nguyen","year":"2024","unstructured":"Nguyen, X.B., Luu, V.S., Nguyen, D.V., Luong, Q.L., Dang, D.T.: Enhancing retrieval performance of embedding models via fine-tuning on synthetic data in rag chatbot for vietnamese military science domain. J. Military Sci. Technol. 99, 109\u2013118 (2024)","journal-title":"J. Military Sci. Technol."},{"key":"10_CR21","unstructured":"Nguyen\u00a0Ba, T., Pham\u00a0Quang, T., Tran\u00a0Van, T., et\u00a0al.: Vietnamese legal information retrieval in question-answering system. arXiv e-prints pp. arXiv\u20132409 (2024)"},{"key":"10_CR22","unstructured":"Pham, N.M., Nguyen, H.T., Do, T.H.: Multi-stage information retrieval for vietnamese legal texts. arXiv preprint arXiv:2209.14494 (2022)"},{"key":"10_CR23","doi-asserted-by":"crossref","unstructured":"Pham, Q.H., et al.: Towards vietnamese question and answer generation: an empirical study. ACM Trans. Asian Low-Resource Lang. Inf. Process. 23(9), 1\u201328 (2024)","DOI":"10.1145\/3675781"},{"key":"10_CR24","doi-asserted-by":"crossref","unstructured":"Phan, Q.L., Doan, T.H.P., Le, N.H., Tran, N.B.D., Huynh, T.N.: Vietnamese sentence paraphrase identification using sentence-bert and phobert. In: International Conference on Intelligence of Things, pp. 416\u2013423. Springer (2022)","DOI":"10.1007\/978-3-031-15063-0_40"},{"key":"10_CR25","doi-asserted-by":"crossref","unstructured":"Reimers, N., Gurevych, I.: Sentence-bert: sentence embeddings using siamese bert-networks. arXiv preprint arXiv:1908.10084 (2019)","DOI":"10.18653\/v1\/D19-1410"},{"key":"10_CR26","doi-asserted-by":"crossref","unstructured":"Robertson, S., Zaragoza, H., et\u00a0al.: The probabilistic relevance framework: Bm25 and beyond. Found. Trends\u00ae Inf. Retrieval 3(4), 333\u2013389 (2009)","DOI":"10.1561\/1500000019"},{"key":"10_CR27","unstructured":"Shen, X., Vakulenko, S., Del\u00a0Tredici, M., Barlacchi, G., Byrne, B., de\u00a0Gispert, A.: Low-resource dense retrieval for open-domain question answering: a comprehensive survey. arXiv preprint arXiv:2208.03197 (2022)"},{"key":"10_CR28","unstructured":"Shu, P., et\u00a0al.: Transcending language boundaries: harnessing llms for low-resource language translation. arXiv preprint arXiv:2411.11295 (2024)"},{"key":"10_CR29","unstructured":"Tien, S.P., Doan, H.N., Dai, A.N., Viet, S.D.: Improving vietnamese legal document retrieval using synthetic data. arXiv preprint arXiv:2412.00657 (2024)"},{"key":"10_CR30","unstructured":"To, L.T., et al.: Evaluating large language model capability in vietnamese fact-checking data generation. arXiv preprint arXiv:2411.05641 (2024)"},{"issue":"7","key":"10_CR31","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/3648471","volume":"56","author":"J Wang","year":"2024","unstructured":"Wang, J., et al.: Utilizing bert for information retrieval: survey, applications, resources, and challenges. ACM Comput. Surv. 56(7), 1\u201333 (2024)","journal-title":"ACM Comput. Surv."},{"key":"10_CR32","unstructured":"Wang, L., Yang, N., Huang, X., Yang, L., Majumder, R., Wei, F.: Multilingual e5 text embeddings: a technical report. arXiv preprint arXiv:2402.05672 (2024)"},{"key":"10_CR33","first-page":"5776","volume":"33","author":"W Wang","year":"2020","unstructured":"Wang, W., Wei, F., Dong, L., Bao, H., Yang, N., Zhou, M.: Minilm: Deep self-attention distillation for task-agnostic compression of pre-trained transformers. Adv. Neural. Inf. Process. Syst. 33, 5776\u20135788 (2020)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"10_CR34","doi-asserted-by":"crossref","unstructured":"Zhan, J., Mao, J., Liu, Y., Guo, J., Zhang, M., Ma, S.: Optimizing dense retrieval model training with hard negatives. In: Proceedings of the 44th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 1503\u20131512 (2021)","DOI":"10.1145\/3404835.3462880"},{"key":"10_CR35","unstructured":"Zhong, T., et\u00a0al.: Opportunities and challenges of large language models for low-resource languages in humanities research. arXiv preprint arXiv:2412.04497 (2024)"}],"container-title":["Lecture Notes in Computer Science","Similarity Search and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-06069-3_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,7]],"date-time":"2025-10-07T15:54:49Z","timestamp":1759852489000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-06069-3_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,8]]},"ISBN":["9783032060686","9783032060693"],"references-count":35,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-06069-3_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,8]]},"assertion":[{"value":"8 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors declare that they have no competing interests relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"SISAP","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Similarity Search and Applications","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Reykjavik","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Iceland","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"3 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"sisap2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.sisap.org\/2025\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}