{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T06:57:18Z","timestamp":1781161038344,"version":"3.54.1"},"publisher-location":"Singapore","reference-count":19,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819200672","type":"print"},{"value":"9789819200689","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-92-0068-9_32","type":"book-chapter","created":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T06:09:58Z","timestamp":1781158198000},"page":"477-491","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Advancing Vietnamese Visual Question Answering via\u00a0Adapter-Augmented Cross-Lingual Embedding Integration"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-2777-9112","authenticated-orcid":false,"given":"Minh","family":"Bui","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-5790-4462","authenticated-orcid":false,"given":"Phuc Do","family":"The","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9948-1048","authenticated-orcid":false,"given":"Huy Tien","family":"Nguyen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9900-7047","authenticated-orcid":false,"given":"Tung","family":"Le","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2026,6,1]]},"reference":[{"key":"32_CR1","unstructured":"Dosovitskiy, A., et al: An image is worth 16x16 words: Transformers for image recognition at scale. In: 9th International Conference on Learning Representations, ICLR 2021, Virtual Event, Austria, May 3-7, 2021 (2021)"},{"key":"32_CR2","unstructured":"Hu, E.J., et al.: Lora: Low-rank adaptation of large language models (2021). https:\/\/arxiv.org\/abs\/2106.09685"},{"key":"32_CR3","unstructured":"Kim, W., Son, B., Kim, I.: Vilt: Vision-and-language transformer without convolution or region supervision. In: International Conference on Machine Learning (2021)"},{"key":"32_CR4","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: Blip-2: Bootstrapping language-image pre-training with frozen image encoders and large language models (2023). https:\/\/arxiv.org\/abs\/2301.12597"},{"key":"32_CR5","unstructured":"Li, J., Li, D., Xiong, C., Hoi, S.: Blip: Bootstrapping language-image pre-training for unified vision-language understanding and generation (2022). https:\/\/arxiv.org\/abs\/2201.12086"},{"key":"32_CR6","unstructured":"Microsoft: unilm: large-scale self-supervised pre-training across tasks, languages, and modalities. GitHub repository (2025). https:\/\/github.com\/microsoft\/unilm, accessed: 2025"},{"key":"32_CR7","unstructured":"Nghia Hieu\u00a0Nguyen, D.T.V.: Openvivqa - open-source Vietnamese visual question answering. GitHub repository (2023). https:\/\/github.com\/nghiangh\/OpenViVQA. Accessed: 2025"},{"key":"32_CR8","doi-asserted-by":"publisher","first-page":"172","DOI":"10.1007\/978-3-031-26431-3_14","volume-title":"Image and Video Technology: 10th Pacific-Rim Symposium, PSIVT 2022, Virtual Event, November 12\u201314, 2022, Proceedings","author":"AD Nguyen","year":"2023","unstructured":"Nguyen, A.D., Le, T., Nguyen, H.T.: Combining multi-vision embedding in contextual attention for Vietnamese visual question answering. In: Wang, H., Lin, W., Manoranjan, P., Xiao, G., Chan, K.L., Wang, X., Ping, G., Jiang, H. (eds.) Image and Video Technology: 10th Pacific-Rim Symposium, PSIVT 2022, Virtual Event, November 12\u201314, 2022, Proceedings, pp. 172\u2013185. Springer International Publishing, Cham (2023)"},{"key":"32_CR9","doi-asserted-by":"publisher","first-page":"1037","DOI":"10.18653\/v1\/2020.findings-emnlp.92","volume-title":"Findings of the Association for Computational Linguistics: EMNLP 2020","author":"DQ Nguyen","year":"2020","unstructured":"Nguyen, D.Q., Tuan Nguyen, A.: PhoBERT: Pre-trained language models for Vietnamese. In: Cohn, T., He, Y., Liu, Y. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 1037\u20131042. Association for Computational Linguistics, Online (Nov (2020)"},{"key":"32_CR10","doi-asserted-by":"crossref","unstructured":"Nguyen, N.H., Nguyen, K.V.: Pat: Parallel attention transformer for visual question answering in vietnamese. In: 2023 International Conference on Multimedia Analysis and Pattern Recognition (MAPR), pp.\u00a01\u20136 (2023)","DOI":"10.1109\/MAPR59823.2023.10288833"},{"key":"32_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2024.109474","volume":"119","author":"NS Nguyen","year":"2024","unstructured":"Nguyen, N.S., Nguyen, V.S., Le, T.: Advancing Vietnamese visual question answering with transformer and convolutional integration. Comput. Electr. Eng. 119, 109474 (2024)","journal-title":"Comput. Electr. Eng."},{"key":"32_CR12","unstructured":"Nguyen Luong\u00a0Tran, Duong Minh\u00a0Le, D.Q.N.: Bartpho: Pre-trained sequence-to-sequence models for Vietnamese. In: Proceedings of the 23rd Annual Conference of the International Speech Communication Association (2022)"},{"key":"32_CR13","unstructured":"Nguyen-Tran, D.M., Le, T., Le\u00a0Nguyen, M., Nguyen, H.T.: Bi-directional cross-attention network on Vietnamese visual question answering. In: Proceedings of the 36th Pacific Asia Conference on Language, Information and Computation, pp. 834\u2013841 (2022)"},{"key":"32_CR14","unstructured":"Peng, Z., Dong, L., Bao, H., Ye, Q., Wei, F.: Beit v2: masked image modeling with vector-quantized visual tokenizers. ArXiv abs\/2208.06366 (2022). https:\/\/api.semanticscholar.org\/CorpusID:251554649"},{"key":"32_CR15","doi-asserted-by":"publisher","DOI":"10.1016\/j.compeleceng.2023.108641","volume":"107","author":"W Shi","year":"2023","unstructured":"Shi, W., Wang, H., Lou, X.: Multi-modal graph reasoning for structured video text extraction. Comput. Electr. Eng. 107, 108641 (2023)","journal-title":"Comput. Electr. Eng."},{"key":"32_CR16","unstructured":"Tran, K.Q., Nguyen, A.T., Le, A.T.H., Nguyen, K.V.: Vivqa: Vietnamese visual question answering. In: Proceedings of the 35th Pacific Asia Conference on Language, Information and Computation, pp. 546\u2013554. Association for Computational Lingustics, Shanghai, China (2021)"},{"key":"32_CR17","doi-asserted-by":"crossref","unstructured":"Tran, K.V., Nguyen, K.V., Nguyen, N.L.T.: Bartphobeit: pre-trained sequence-to-sequence and image transformers models for Vietnamese visual question answering. In: 2023 International Conference on Multimedia Analysis and Pattern Recognition (MAPR), pp.\u00a01\u20136 (2023)","DOI":"10.1109\/MAPR59823.2023.10288874"},{"key":"32_CR18","doi-asserted-by":"crossref","unstructured":"Tuan\u00a0Nguyen, A., Dao, M.H., Nguyen, D.Q.: A pilot study of text-to-SQL semantic parsing for Vietnamese. In: Cohn, T., He, Y., Liu, Y. (eds.) Findings of the Association for Computational Linguistics: EMNLP 2020, pp. 4079\u20134085 (Nov 2020)","DOI":"10.18653\/v1\/2020.findings-emnlp.364"},{"key":"32_CR19","doi-asserted-by":"crossref","unstructured":"Wang, W., et al.: Image as a foreign language: Beit pretraining for vision and vision-language tasks. 2023 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR), pp. 19175\u201319186 (2023)","DOI":"10.1109\/CVPR52729.2023.01838"}],"container-title":["Communications in Computer and Information Science","Recent Challenges in Intelligent information and Database Systems"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-92-0068-9_32","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,6,11]],"date-time":"2026-06-11T06:10:02Z","timestamp":1781158202000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-92-0068-9_32"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819200672","9789819200689"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-981-92-0068-9_32","relation":{},"ISSN":["1865-0929","1865-0937"],"issn-type":[{"value":"1865-0929","type":"print"},{"value":"1865-0937","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"1 June 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"ACIIDS","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asian Conference on Intelligent Information and Database Systems","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Kaohsiung","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Taiwan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2026","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 April 2026","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 April 2026","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aciids2026","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aciids.pwr.edu.pl\/2026\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}