{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T02:31:42Z","timestamp":1776306702978,"version":"3.50.1"},"reference-count":151,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T00:00:00Z","timestamp":1769472000000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by\/4.0\/"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Information Fusion"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.inffus.2026.104189","type":"journal-article","created":{"date-parts":[[2026,1,28]],"date-time":"2026-01-28T00:39:35Z","timestamp":1769560775000},"page":"104189","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":3,"special_numbering":"C","title":["Large multimodal models for low-resource languages: A survey"],"prefix":"10.1016","volume":"131","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-7558-1263","authenticated-orcid":false,"given":"Marian","family":"Lupa\u015fcu","sequence":"first","affiliation":[]},{"given":"Ana-Cristina","family":"Rogoz","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9531-4576","authenticated-orcid":false,"given":"Mihai","family":"Sorin Stupariu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9301-1950","authenticated-orcid":false,"given":"Radu","family":"Tudor Ionescu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.inffus.2026.104189_sbref0001","series-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems (NeuIPS)","first-page":"72096","article-title":"Language is not all you need: aligning perception with language models","author":"Huang","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0002","series-title":"Proceedings of the 40th International Conference on Machine Learning (ICML)","first-page":"8469","article-title":"PaLM-E: an embodied multimodal language model","author":"Driess","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0003","unstructured":"S. Khanna, X. Li, Invisible languages of the LLM universe, (2025). https:\/\/arxiv.org\/abs\/2510.11557."},{"key":"10.1016\/j.inffus.2026.104189_bib0004","doi-asserted-by":"crossref","first-page":"43","DOI":"10.5334\/dsj-2020-043","article-title":"The CARE principles for indigenous data governance","volume":"19","author":"Carroll","year":"2020","journal-title":"Data Sci. J."},{"key":"10.1016\/j.inffus.2026.104189_bib0005","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (ACL)","first-page":"7817","article-title":"Local languages, third spaces, and other high-resource scenarios","author":"Bird","year":"2022"},{"issue":"13","key":"10.1016\/j.inffus.2026.104189_bib0006","doi-asserted-by":"crossref","first-page":"3521","DOI":"10.1073\/pnas.1611835114","article-title":"Overcoming catastrophic forgetting in neural networks","volume":"114","author":"Kirkpatrick","year":"2017","journal-title":"Proc. Natl. Acad. Sci."},{"key":"10.1016\/j.inffus.2026.104189_bib0007","series-title":"Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)","first-page":"4171","article-title":"BERT: pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2019"},{"key":"10.1016\/j.inffus.2026.104189_bib0008","series-title":"Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"1371","article-title":"Geographic citation gaps in NLP research","author":"Rungta","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0009","series-title":"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (ACL)","first-page":"6282","article-title":"The state and fate of linguistic diversity and inclusion in the NLP world","author":"Joshi","year":"2020"},{"key":"10.1016\/j.inffus.2026.104189_bib0010","series-title":"Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (AACL-IJCNLP)","first-page":"823","article-title":"Some languages are more equal than others: probing deeper into the linguistic disparity in the NLP world","author":"Ranathunga","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0011","unstructured":"A. Caines, M. Rei, The Geographic Diversity of NLP Conferences, 2019, (https:\/\/www.marekrei.com\/blog\/geographic-diversity-of-nlp-conferences\/) Accessed: December 2025."},{"key":"10.1016\/j.inffus.2026.104189_bib0012","doi-asserted-by":"crossref","first-page":"72","DOI":"10.1145\/3447735","article-title":"A panoramic survey of natural language processing in the arab world","volume":"64","author":"Darwish","year":"2020","journal-title":"Commun. ACM"},{"key":"10.1016\/j.inffus.2026.104189_bib0013","unstructured":"O. Kanishcheva, CLARIN Knowledge Centre for Ukrainian NLP and Corpora (UkrNLP-Corpora), 2023, (https:\/\/www.clarin.eu\/blog\/introduction-clarin-knowledge-centre-ukrainian-nlp-and-corpora-ukrnlp-corpora) Accessed: December 2025."},{"key":"10.1016\/j.inffus.2026.104189_bib0014","unstructured":"M. Romanyshyn (Ed.), Proceedings of the fourth Ukrainian natural language processing workshop (UNLP), Association for Computational Linguistics, 2025. 10.18653\/v1\/2025.unlp-1.0."},{"key":"10.1016\/j.inffus.2026.104189_bib0015","series-title":"Proceedings of the Fourth Ukrainian Natural Language Processing Workshop (UNLP)","first-page":"194","article-title":"Hidden persuasion: detecting manipulative narratives on social media during the 2022\u202frussian invasion of Ukraine","author":"Akhynko","year":"2025"},{"key":"10.1016\/j.inffus.2026.104189_bib0016","unstructured":"M. Li, Top 50+ Chinese AI Investment Statistics [2025], 2025, (https:\/\/www.secondtalent.com\/resources\/chinese-ai-investment-statistics\/) Accessed: December 2025."},{"issue":"6731","key":"10.1016\/j.inffus.2026.104189_sbref0017","doi-asserted-by":"crossref","first-page":"238","DOI":"10.1126\/science.adv9836","article-title":"Chinese firm\u2019s faster, cheaper AI language model makes a splash","volume":"387","author":"Normile","year":"2025","journal-title":"Science"},{"key":"10.1016\/j.inffus.2026.104189_bib0018","unstructured":"J. Gu, X. Jiang, Z. Shi, H. Tan, X. Zhai, C. Xu, W. Li, Y. Shen, S. Ma, H. Liu, Y. Wang, J. Guo, A survey on LLM-as-a-judge, (2024). https:\/\/arxiv.org\/abs\/2411.15594."},{"issue":"11","key":"10.1016\/j.inffus.2026.104189_sbref0019","doi-asserted-by":"crossref","DOI":"10.1016\/j.patter.2021.100336","article-title":"Data and its (dis)contents: a survey of dataset development and use in machine learning research","volume":"2","author":"Paullada","year":"2021","journal-title":"Patterns"},{"key":"10.1016\/j.inffus.2026.104189_bib0020","series-title":"Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"10215","article-title":"XTREME-R: towards more challenging and nuanced multilingual evaluation","author":"Ruder","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_bib0021","unstructured":"W.X. Zhao, K. Zhou, J. Li, T. Tang, X. Wang, Y. Hou, Y. Min, B. Zhang, J. Zhang, Z. Dong, et al., A survey of large language models, (2023). https:\/\/arxiv.org\/abs\/2303.18223."},{"key":"10.1016\/j.inffus.2026.104189_bib0022","unstructured":"W. Zhu, Y. Lv, Q. Dong, F. Yuan, J. Xu, S. Huang, L. Kong, J. Chen, L. Li, Extrapolating Large Language Models to Non-English by Aligning Languages, (2023). https:\/\/arxiv.org\/abs\/2308.04948."},{"issue":"3\u20134","key":"10.1016\/j.inffus.2026.104189_sbref0023","doi-asserted-by":"crossref","first-page":"163","DOI":"10.1561\/0600000105","article-title":"Vision-language pre-training: basics, recent advances, and future trends","volume":"14","author":"Gan","year":"2022","journal-title":"Found. Trends Compu. Graph. Vision"},{"key":"10.1016\/j.inffus.2026.104189_bib0024","doi-asserted-by":"crossref","first-page":"447","DOI":"10.1007\/s11633-022-1410-8","article-title":"Large-scale multi-modal pre-trained models: a comprehensive survey","volume":"20","author":"Wang","year":"2023","journal-title":"Mach. Intell. Res."},{"key":"10.1016\/j.inffus.2026.104189_sbref0025","series-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","first-page":"1587","article-title":"A survey of state of the art large vision language models: benchmark evaluations and challenges","author":"Li","year":"2025"},{"issue":"12","key":"10.1016\/j.inffus.2026.104189_sbref0026","doi-asserted-by":"crossref","DOI":"10.1093\/nsr\/nwae403","article-title":"A survey on multimodal large language models","volume":"11","author":"Yin","year":"2024","journal-title":"Natl. Sci. Rev."},{"key":"10.1016\/j.inffus.2026.104189_bib0027","unstructured":"J. Xie, Z. Chen, R. Zhang, X. Wan, G. Li, Large multimodal agents: a survey, (2024). https:\/\/arxiv.org\/abs\/2402.15116."},{"key":"10.1016\/j.inffus.2026.104189_bib0028","unstructured":"M. Xu, W. Yin, D. Cai, R. Yi, D. Xu, Q. Wang, B. Wu, Y. Zhao, C. Yang, S. Wang, et al., A survey of resource-efficient LLM and multimodal foundation models, (2024). https:\/\/arxiv.org\/abs\/2401.08092."},{"key":"10.1016\/j.inffus.2026.104189_sbref0029","series-title":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics: Tutorial Abstracts (EACL)","first-page":"27","article-title":"LLMs for low resource languages in multilingual, multimodal and dialectal settings","author":"Alam","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_bib0030","unstructured":"S. Mu, S. Lin, A comprehensive survey of mixture-of-experts: algorithms, theory, and applications, (2025). 10.48550\/ARXIV.2503.07137."},{"issue":"1","key":"10.1016\/j.inffus.2026.104189_sbref0031","doi-asserted-by":"crossref","first-page":"39","DOI":"10.18178\/joig.6.1.39-43","article-title":"Multimodal sentiment analysis of arabic videos","volume":"6","author":"Najadat","year":"2018","journal-title":"J. Image Graph."},{"key":"10.1016\/j.inffus.2026.104189_bib0032","unstructured":"B.R. Chakravarthi, P. Jishnu, B. Premjith, K.P. Soman, R. Ponnusamy, P.K. Kumaresan, K.P. Thamburaj, J.P. McCrae, DravidianMultiModality: a dataset for multi-modal sentiment analysis in Tamil and Malayalam, (2021). https:\/\/arxiv.org\/abs\/2106.04853."},{"key":"10.1016\/j.inffus.2026.104189_bib0033","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3703445","article-title":"Multimodal sentiment analysis for the malay language: new corpus using CNN-based framework","volume":"24","author":"Taylor","year":"2024","journal-title":"ACM Trans. Asian Low-Resour. Lang. Inf. Process."},{"key":"10.1016\/j.inffus.2026.104189_sbref0034","series-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING)","first-page":"7270","article-title":"FFSTC: Fongbe to French speech translation corpus","author":"Kponou","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_bib0035","series-title":"Proceedings of Fourth International Conference on Intelligent Data Science Technologies and Applications (IDSTA)","first-page":"126","article-title":"Towards arabic multimodal dataset for sentiment analysis","author":"Haouhat","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0036","series-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference (LREC)","first-page":"1542","article-title":"MemoSen: a multimodal dataset for sentiment analysis of memes","author":"Hossain","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0037","series-title":"Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12Th International Joint Conference on Natural Language Processing: Student Research Workshop (AACL-IJCNLP)","first-page":"32","article-title":"MUTE: a multimodal dataset for detecting hateful memes","author":"Hossain","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0038","unstructured":"V. P\u0103i\u015f, S. Ni\u0163\u0103, A.-I. Jerpelea, L. Pan\u0103, E. Curea, RoMemes: a multimodal meme corpus for the Romanian language, (2024). https:\/\/arxiv.org\/abs\/2410.15497."},{"key":"10.1016\/j.inffus.2026.104189_bib0039","series-title":"Proceedings of IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG)","first-page":"1","article-title":"Arabsign: a multi-modality dataset and benchmark for continuous arabic sign language recognition","author":"Luqman","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0040","series-title":"Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (ACL)","first-page":"2062","article-title":"BIG-C: a multimodal multi-purpose dataset for bemba","author":"Sikasote","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0041","series-title":"Proceedings of the 18th International Conference on Natural Language Processing (ICON)","first-page":"54","article-title":"An experiment on speech-to-text translation systems for manipuri to english on low resource setting","author":"Sanayai Meetei","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_bib0042","series-title":"Proceedings of the First Workshop of Evaluation of Multi-Modal Generation (EvalMG)","first-page":"52","article-title":"Persian in a court: benchmarking VLMs in persian multi-modal tasks","author":"Farsi","year":"2025"},{"key":"10.1016\/j.inffus.2026.104189_bib0043","series-title":"Proceedings of the 9th International Conference on Frontiers in Intelligent Computing: Theory and Applications (FICTA)","first-page":"63","article-title":"Bengali visual genome: a multimodal dataset for machine translation and image captioning","author":"Arghyadeep","year":"2021"},{"issue":"1","key":"10.1016\/j.inffus.2026.104189_bib0044","doi-asserted-by":"crossref","first-page":"32","DOI":"10.1007\/s11263-016-0981-7","article-title":"Visual genome: connecting language and vision using crowdsourced dense image annotations","volume":"123","author":"Krishna","year":"2017","journal-title":"Int. J. Comput. Vis."},{"key":"10.1016\/j.inffus.2026.104189_sbref0045","series-title":"Proceedings of the Thirteenth Language Resources and Evaluation Conference (LREC)","first-page":"6471","article-title":"Hausa visual genome: a dataset for multi-modal english to hausa machine translation","author":"Abdulmumin","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0046","series-title":"Findings of the Association for Computational Linguistics (ACL)","first-page":"10162","article-title":"HaVQA: a dataset for visual question answering and multimodal research in hausa language","author":"Parida","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0047","series-title":"Proceedings of the First Workshop on Natural Language Processing for Indo-Aryan and Dravidian Languages (IndoNLP)","first-page":"58","article-title":"OVQA: a dataset for visual question answering and multimodal research in odia language","author":"Parida","year":"2025"},{"key":"10.1016\/j.inffus.2026.104189_sbref0048","series-title":"Proceedings of Conference of the International Speech Communication Association (INTERSPEECH)","first-page":"4064","article-title":"MuAViC: a multilingual audio-visual corpus for robust speech recognition and robust speech-to-text translation","author":"Anwar","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0049","series-title":"Proceedings of the Second Ukrainian Natural Language Processing Workshop (UNLP)","first-page":"54","article-title":"Extension multi30K: multimodal dataset for integrated vision and language research in ukrainian","author":"Saichyshyna","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0050","series-title":"Proceedings of the 5th Workshop on Vision and Language (VL\u201916)","first-page":"70","article-title":"Multi30K: multilingual english-german image descriptions","author":"Elliott","year":"2016"},{"key":"10.1016\/j.inffus.2026.104189_bib0051","series-title":"Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing (EMNLP)","first-page":"5155","article-title":"SEACrowd: a multilingual multimodal data hub and benchmark suite for southeast asian languages","author":"Lovenia","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0052","doi-asserted-by":"crossref","first-page":"950","DOI":"10.1162\/tacl_a_00682","article-title":"CreoleVal: multilingual multitask benchmarks for creoles","volume":"12","author":"Lent","year":"2024","journal-title":"Trans. Assoc. Comput. Linguist."},{"key":"10.1016\/j.inffus.2026.104189_sbref0053","series-title":"Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo)","first-page":"33","article-title":"Multimodal neural machine translation for low-resource language pairs using synthetic data","author":"Dutta Chowdhury","year":"2018"},{"key":"10.1016\/j.inffus.2026.104189_sbref0054","doi-asserted-by":"crossref","first-page":"67","DOI":"10.1162\/tacl_a_00166","article-title":"From image descriptions to visual denotations: new similarity metrics for semantic inference over event descriptions","volume":"2","author":"Young","year":"2014","journal-title":"Trans. Assoc. Computat. Linguist."},{"key":"10.1016\/j.inffus.2026.104189_sbref0055","series-title":"Proceedings of the First Workshop on Multimodal Machine Translation for Low Resource Languages (MMTLRL)","first-page":"20","article-title":"Low resource multimodal neural machine translation of english-hindi in news domain","author":"Meetei","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_sbref0056","series-title":"Proceedings of the Ninth Conference on Machine Translation (WMT)","first-page":"810","article-title":"DCU ADAPT at WMT24: english to low-resource multi-modal translation task","author":"Haq","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_bib0057","series-title":"Proceedings of the Second Arabic Natural Language Processing Conference (ArabicNLP)","first-page":"320","article-title":"Dallah: a dialect-Aware multimodal large language model for arabic","author":"Alwajih","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0058","series-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)","first-page":"2596","article-title":"Adapting grounded visual question answering models to low resource languages","author":"Wang","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0059","series-title":"Proceedings of the 30th ACM International Conference on Multimedia (ACMMM)","first-page":"422","article-title":"Cross-lingual cross-modal retrieval with noise-robust learning","author":"Wang","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_sbref0060","series-title":"Proceedings of the 10th Workshop on Asian Translation (WAT)","first-page":"41","article-title":"BITS-P at WAT 2023: improving indic language multimodal translation by image augmentation using diffusion models","author":"Dash","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0061","unstructured":"K.T. Doan, B.G. Huynh, D.T. Hoang, T.D. Pham, N.H. Pham, Q. Nguyen, B.Q. Vo, S.N. Hoang, Vintern-1B: an efficient multimodal large language model for Vietnamese, (2024). https:\/\/arxiv.org\/abs\/2408.12480."},{"key":"10.1016\/j.inffus.2026.104189_sbref0062","series-title":"Proceedings of the 34th Conference on Computational Linguistics and Speech Processing (ROCLING 2022)","first-page":"263","article-title":"Image caption generation for low-resource assamese language","author":"Nath","year":"2022"},{"issue":"20","key":"10.1016\/j.inffus.2026.104189_sbref0063","doi-asserted-by":"crossref","first-page":"9533","DOI":"10.3390\/app14209533","article-title":"Multimodal seed data augmentation for low-resource audio latin cuengh language","volume":"14","author":"Jiang","year":"2024","journal-title":"Appl. Sci."},{"key":"10.1016\/j.inffus.2026.104189_bib0064","unstructured":"X. Qu, M. Song, W. Wei, J. Dong, Y. Cheng, Mitigating multilingual hallucination in large vision-language models, (2024). https:\/\/arxiv.org\/abs\/2408.00550."},{"key":"10.1016\/j.inffus.2026.104189_sbref0065","series-title":"Proceedings of the 37th International Conference on Neural Information Processing Systems (NeurIPS)","first-page":"53728","article-title":"Direct preference optimization: your language model is secretly a reward model","volume":"36","author":"Rafailov","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0066","unstructured":"M. Shukor, L. Bethune, D. Busbridge, D. Grangier, E. Fini, A. El-Nouby, P. Ablin, Scaling laws for optimal data mixtures, (2025). https:\/\/arxiv.org\/abs\/2507.09404."},{"key":"10.1016\/j.inffus.2026.104189_bib0067","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3597307","article-title":"Biases in large language models: origins, inventory, and discussion","volume":"15","author":"Navigli","year":"2023","journal-title":"ACM J. Data Inf. Qual."},{"key":"10.1016\/j.inffus.2026.104189_sbref0068","series-title":"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING)","first-page":"15922","article-title":"The ethical question \u2013 use of indigenous corpora for large language models","author":"Wiechetek","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0069","series-title":"Proceedings of 22nd International Arab Conference on Information Technology (ACIT)","first-page":"1","article-title":"Arabic language investigation in the context of unimodal and multimodal sentiment analysis","author":"Youcef","year":"2021"},{"issue":"C","key":"10.1016\/j.inffus.2026.104189_bib0070","article-title":"Multimodal arabic emotion recognition using deep learning","volume":"155","author":"Al Roken","year":"2023","journal-title":"Speech Commun."},{"issue":"C","key":"10.1016\/j.inffus.2026.104189_sbref0071","doi-asserted-by":"crossref","first-page":"377","DOI":"10.1016\/j.neucom.2021.02.020","article-title":"A novel context-aware multimodal framework for persian sentiment analysis","volume":"457","author":"Dashtipour","year":"2021","journal-title":"Neurocomputing"},{"key":"10.1016\/j.inffus.2026.104189_sbref0072","doi-asserted-by":"crossref","first-page":"136843","DOI":"10.1109\/ACCESS.2020.3011977","article-title":"Enhanced video analytics for sentiment analysis based on fusing textual, auditory and visual information","volume":"8","author":"Al-Azani","year":"2020","journal-title":"IEEE Access"},{"key":"10.1016\/j.inffus.2026.104189_sbref0073","series-title":"Proceedings of the Third Workshop on Speech and Language Technologies for Dravidian Languages (DravidianLangTech)","first-page":"72","article-title":"Findings of the shared task on multimodal abusive language detection and sentiment analysis in tamil and malayalam","author":"Premjith","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0074","series-title":"Proceedings of the Fifth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages (DravidianLangTech)","first-page":"80","article-title":"BytesizedLLM@dravidianlangtech 2025: abusive tamil and malayalam text targeting women on social media using XLM-RoBERTa and attention-BiLSTM","author":"Kodali","year":"2025"},{"key":"10.1016\/j.inffus.2026.104189_sbref0075","series-title":"Proceedings of the Fourth Workshop on Threat, Aggression & Cyberbullying (TRAC)","first-page":"85","article-title":"Detecting hate speech in amharic using multimodal analysis of social media memes","author":"Jigar","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0076","series-title":"Proceedings of International Conference on Information and Communication Technology for Development for Africa (ICT4DA)","first-page":"102","article-title":"Multimodal amharic hate speech detection using deep learning","author":"Debele","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0077","series-title":"Proceedings of the Ninth Conference on Machine Translation (WMT)","first-page":"815","article-title":"English-to-low-resource translation: a multimodal approach for hindi, malayalam, bengali, and hausa","author":"Hatami","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0078","series-title":"Proceedings of 24th International Arab Conference on Information Technology (ACIT)","first-page":"1","article-title":"A novel deep learning multi-Modal sentiment analysis model for english and egyptian arabic dialects using audio and text","author":"Alalem","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0079","series-title":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)","first-page":"2583","article-title":"An efficient fusion mechanism for multimodal low-resource setting","author":"Chauhan","year":"2022"},{"issue":"4","key":"10.1016\/j.inffus.2026.104189_sbref0080","doi-asserted-by":"crossref","first-page":"799","DOI":"10.3390\/electronics14040799","article-title":"Sentimentformer: a transformer-based multimodal fusion framework for enhanced sentiment analysis of memes in under-resourced bangla language","volume":"14","author":"Faria","year":"2025","journal-title":"Electronics"},{"key":"10.1016\/j.inffus.2026.104189_sbref0081","series-title":"Proceedings of the International Conference on Speech and Language Technologies for Low-Resource Languages (SPELLL)","first-page":"293","article-title":"Multimodal hate speech detection from bengali memes and texts","author":"Karim","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_sbref0082","doi-asserted-by":"crossref","first-page":"9716","DOI":"10.1109\/ACCESS.2023.3240373","article-title":"Multimodal arabic rumors detection","volume":"11","author":"Albalawi","year":"2023","journal-title":"IEEE Access"},{"issue":"12","key":"10.1016\/j.inffus.2026.104189_sbref0083","doi-asserted-by":"crossref","first-page":"3714","DOI":"10.3390\/s24123714","article-title":"Multimodal sensing for depression risk detection: integrating audio, video, and text data","volume":"24","author":"Zhang","year":"2024","journal-title":"Sensors"},{"key":"10.1016\/j.inffus.2026.104189_sbref0084","first-page":"2796","article-title":"A lip-reading model for tagalog using multimodal deep learning approach","volume":"8","author":"Deocampo","year":"2024","journal-title":"Int. J. Comput. Sci. Res."},{"key":"10.1016\/j.inffus.2026.104189_sbref0085","doi-asserted-by":"crossref","first-page":"153072","DOI":"10.1109\/ACCESS.2021.3122025","article-title":"Urdu sentiment analysis via multimodal data mining based on deep learning algorithms","volume":"9","author":"Sehar","year":"2021","journal-title":"IEEE Access"},{"issue":"3","key":"10.1016\/j.inffus.2026.104189_sbref0086","first-page":"503","article-title":"Advanced multimodal emotion recognition for javanese language using deep learning","volume":"12","author":"Arifin","year":"2024","journal-title":"Indon. J. Electr. Eng. Inform."},{"issue":"3","key":"10.1016\/j.inffus.2026.104189_bib0087","doi-asserted-by":"crossref","first-page":"314","DOI":"10.1504\/IJMC.2020.107097","article-title":"Multimodal systems for speech recognition","volume":"18","author":"Mamyrbayev","year":"2020","journal-title":"Int. J. Mobile Commun."},{"key":"10.1016\/j.inffus.2026.104189_sbref0088","series-title":"Proceedings of 26th International Conference on Computer and Information Technology (ICCIT)","first-page":"1","article-title":"Explainable multimodal sentiment analysis on bengali memes","author":"Elahi","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0089","series-title":"Proceedings of the Fourth Workshop on Speech, Vision, and Language Technologies for Dravidian Languages (DravidianLangTech)","first-page":"212","article-title":"Binary_beasts@dravidianlangtech-EACL 2024: multimodal abusive language detection in tamil based on integrated approach of machine learning and deep learning techniques","author":"Rahman","year":"2024"},{"issue":"C","key":"10.1016\/j.inffus.2026.104189_sbref0090","article-title":"A multi-stage multimodal framework for sentiment analysis of assamese in low resource setting","volume":"204","author":"Das","year":"2022","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.inffus.2026.104189_sbref0091","series-title":"Proceedings of the 2nd Workshop on Technologies for MT of Low Resource Languages (LoResMT)","first-page":"56","article-title":"Multilingual multimodal machine translation for dravidian languages utilizing phonetic transcription","author":"Chakravarthi","year":"2019"},{"key":"10.1016\/j.inffus.2026.104189_sbref0092","doi-asserted-by":"crossref","first-page":"13137","DOI":"10.1007\/s11042-023-15721-2","article-title":"Exploiting multiple correlated modalities can enhance low-resource machine translation quality","volume":"83","author":"Meetei","year":"2024","journal-title":"Multimed. Tools Appl."},{"key":"10.1016\/j.inffus.2026.104189_bib0093","series-title":"Proceedings of the 31st ACM International Conference on Multimedia (ACMMM)","first-page":"9467","article-title":"Cascaded cross-modal transformer for request and complaint detection","author":"Ristea","year":"2023"},{"issue":"2","key":"10.1016\/j.inffus.2026.104189_sbref0094","first-page":"21","article-title":"Multi-modal deep learning approach to improve sentence level sinhala sign language recognition","volume":"16","author":"Haputhanthri","year":"2023","journal-title":"Int. J. Adv. ICT Emerging Reg."},{"key":"10.1016\/j.inffus.2026.104189_sbref0095","series-title":"Proceedings of International Conference on Asian Language Processing (IALP)","first-page":"291","article-title":"Multi-modal sentiment analysis of mongolian language based on pre-trained models and high-resolution networks","author":"Yang","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0096","series-title":"Proceedings of the 7th Workshop on Asian Translation (WAT)","first-page":"109","article-title":"Multimodal neural machine translation for english to hindi","author":"Laskar","year":"2020"},{"key":"10.1016\/j.inffus.2026.104189_sbref0097","series-title":"Proceedings of the 8th Workshop on Asian Translation (WAT)","first-page":"155","article-title":"Improved english to hindi multimodal neural machine translation","author":"Laskar","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_bib0098","unstructured":"B. Gain, D. Bandyopadhyay, S. Mukherjee, C. Adak, A. Ekbal, Impact of visual context on noisy multimodal NMT: an empirical study for english to Indian languages, (2023). https:\/\/arxiv.org\/abs\/2308.16075."},{"issue":"1","key":"10.1016\/j.inffus.2026.104189_sbref0099","article-title":"Adding visual information to improve multimodal machine translation for low-resource language","volume":"2022","author":"Shi","year":"2022","journal-title":"Math. Prob. Eng."},{"key":"10.1016\/j.inffus.2026.104189_sbref0100","article-title":"Do cues in a video help in handling rare words in a machine translation system under a low-resource setting?","volume":"3","author":"Meetei","year":"2023","journal-title":"Natur. Lang. Process. J."},{"key":"10.1016\/j.inffus.2026.104189_sbref0101","series-title":"Proceedings of International Conference on Machine Learning and Data Engineering (ICMLDE)","first-page":"2102","article-title":"Hindi to english multimodal machine translation on news dataset in low resource setting","volume":"218","author":"Meetei","year":"2023"},{"issue":"4","key":"10.1016\/j.inffus.2026.104189_sbref0102","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3652161","article-title":"Unsupervised multimodal machine translation for low-resource distant language pairs","volume":"23","author":"Tayir","year":"2024","journal-title":"ACM Trans. Asian Low-Resour. Lang. Inf. Process."},{"key":"10.1016\/j.inffus.2026.104189_sbref0103","series-title":"Proceedings of Chinese Conference on Pattern Recognition and Computer Vision (PRCV)","first-page":"260","article-title":"Low-resource machine translation with different granularity image features","author":"Tayir","year":"2025"},{"key":"10.1016\/j.inffus.2026.104189_sbref0104","series-title":"Proceedings of 6th International Conference on Intelligent Computing and Control Systems (ICICCS)","first-page":"1469","article-title":"English-malayalam vision aid with multi modal machine learning technologies","author":"Lekshmy","year":"2022"},{"issue":"4","key":"10.1016\/j.inffus.2026.104189_sbref0105","doi-asserted-by":"crossref","first-page":"1499","DOI":"10.13053\/cys-23-4-3294","article-title":"Hindi visual genome: a dataset for multi-modal english to hindi machine translation","volume":"23","author":"Parida","year":"2019","journal-title":"Computaci\u00f3n y Sistemas"},{"key":"10.1016\/j.inffus.2026.104189_sbref0106","series-title":"Proceedings of the First Workshop on Multimodal Machine Translation for Low Resource Languages (MMTLRL)","first-page":"31","article-title":"Multimodal neural machine translation system for english to bengali","author":"Parida","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_bib0107","unstructured":"L. Nortje, D. Onea\u0163\u0103, G. Pirlogeanu, H. Kamper, Improved visually prompted keyword localisation in real low-resource settings, (2024). https:\/\/arxiv.org\/abs\/2409.06013."},{"key":"10.1016\/j.inffus.2026.104189_sbref0108","series-title":"Findings of the Association for Computational Linguistics: Empirical Methods in Natural Language Processing (EMNLP)","first-page":"3449","article-title":"MURAL: multimodal, multitask representations across languages","author":"Jain","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_sbref0109","series-title":"Proceedings of 2022 International Joint Conference on Neural Networks (IJCNN)","first-page":"1","article-title":"Multimodal neural machine translation for mongolian to chinese","author":"Jian","year":"2022"},{"issue":"24","key":"10.1016\/j.inffus.2026.104189_sbref0110","doi-asserted-by":"crossref","first-page":"14691","DOI":"10.1007\/s00521-024-09818-4","article-title":"Multimodal attention-driven visual question answering for malayalam","volume":"36","author":"Kovath","year":"2024","journal-title":"Neural Comput. Appl."},{"key":"10.1016\/j.inffus.2026.104189_sbref0111","series-title":"Proceedings of the 9th Workshop on Asian Translation (WAT)","first-page":"117","article-title":"Investigation of english to hindi multimodal neural machine translation using transliteration-based phrase pairs augmentation","author":"Laskar","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_sbref0112","series-title":"Proceedings of International Conference on Computational Performance Evaluation (ComPE)","first-page":"387","article-title":"Multimodal neural machine translation for english\u2013assamese pair","author":"Laskar","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_sbref0113","series-title":"Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","first-page":"5110","article-title":"A simple multi-modality transfer learning baseline for sign language translation","author":"Chen","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0114","unstructured":"A. Amalas, M. Ghogho, M. Chetouani, R.O.H. Thami, A multilingual training strategy for low resource Text to Speech, (2024). https:\/\/arxiv.org\/abs\/2409.01217."},{"issue":"3","key":"10.1016\/j.inffus.2026.104189_bib0115","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3492325","article-title":"When pairs meet triplets: improving low-resource captioning via multi-objective optimization","volume":"18","author":"Wu","year":"2022","journal-title":"ACM Trans. Multimedia Comput. Commun. Appl."},{"key":"10.1016\/j.inffus.2026.104189_sbref0116","series-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"10471","article-title":"Visual speech recognition for languages with limited labeled data using automatic labels from whisper","author":"Yeo","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0117","series-title":"Proceedings of the Second Arabic Natural Language Processing Conference (ArabicNLP)","first-page":"210","article-title":"Qalam: a multimodal LLM for arabic optical character and handwriting recognition","author":"Bhatia","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_bib0118","unstructured":"C. Tran, H.L. Thanh, LaVy: Vietnamese multimodal large language model, (2024). https:\/\/arxiv.org\/abs\/2404.07922."},{"key":"10.1016\/j.inffus.2026.104189_sbref0119","doi-asserted-by":"crossref","first-page":"31","DOI":"10.4314\/ujah.v25i1.2","article-title":"An analysis of minimal pairs in igbo using a multimodal approach to speech perception","volume":"25","author":"Onuoha","year":"2024","journal-title":"Unizik J. Arts Human."},{"key":"10.1016\/j.inffus.2026.104189_sbref0120","series-title":"Proceedings of IEEE International Conference on Multimedia and Expo (ICME)","first-page":"362","article-title":"Improving captioning for low-resource languages by cycle consistency","author":"Wu","year":"2019"},{"issue":"5","key":"10.1016\/j.inffus.2026.104189_sbref0121","doi-asserted-by":"crossref","first-page":"104","DOI":"10.1145\/3514498","article-title":"Exploring multi-lingual, multi-task, and adversarial learning for low-resource sentiment analysis","volume":"21","author":"Mamta","year":"2022","journal-title":"ACM Trans. Asian Low-Resour. Lang. Inf. Process."},{"key":"10.1016\/j.inffus.2026.104189_sbref0122","series-title":"Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (ACL)","first-page":"2763","article-title":"A good prompt is worth millions of parameters: low-resource prompt-based learning for vision-language models","author":"Jin","year":"2022"},{"issue":"1","key":"10.1016\/j.inffus.2026.104189_sbref0123","article-title":"Amharic language image captions generation using hybridized attention-based deep neural networks","volume":"2023","author":"Solomon","year":"2023","journal-title":"Appl. Computat. Intell. Soft Comput."},{"key":"10.1016\/j.inffus.2026.104189_sbref0124","doi-asserted-by":"crossref","DOI":"10.1016\/j.bspc.2023.105051","article-title":"Morphology & word sense disambiguation embedded multimodal neural machine translation system between sanskrit and malayalam","volume":"85","author":"Rahul","year":"2023","journal-title":"Biomed. Signal Process. Control"},{"key":"10.1016\/j.inffus.2026.104189_sbref0125","series-title":"Findings of the Association for Computational Linguistics (ACL)","first-page":"6368","article-title":"XtremeCLIP: extremely parameter-efficient tuning for low-resource vision language understanding","author":"Tang","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_bib0126","unstructured":"A. Asgarov, S. Rustamov, LowCLIP: adapting the CLIP model architecture for low-resource languages in multimodal image retrieval task, (2024). 10.48550\/arXiv.2408.13909."},{"issue":"1","key":"10.1016\/j.inffus.2026.104189_sbref0127","first-page":"28","article-title":"Speech recognition model in yoruba language","volume":"1","author":"Rahmon","year":"2024","journal-title":"Smartify J. Smart Educ. Pedag."},{"key":"10.1016\/j.inffus.2026.104189_bib0128","unstructured":"A. Dubey, A. Jauhri, A. Pandey, A. Kadian, A. Al-Dahle, A. Letman, A. Mathur, A. Schelten, A. Yang, A. Fan, et al., The Llama 3 herd of models, (2024). 10.48550\/ARXIV.2407.21783."},{"key":"10.1016\/j.inffus.2026.104189_bib0129","unstructured":"DeepSeek-AI, A. Liu, B. Feng, B. Xue, B. Wang, B. Wu, C. Lu, C. Zhao, C. Deng, C. Zhang, et al., DeepSeek-V3 Technical Report, (2024). 10.48550\/ARXIV.2412.19437."},{"key":"10.1016\/j.inffus.2026.104189_bib0130","unstructured":"Anthropic, Claude Opus 4 & Claude Sonnet 4 System Card, 2025. Accessed: December 2025, https:\/\/www-cdn.anthropic.com\/07b2a3f9902ee19fe39a36ca638e5ae987bc64dd.pdf."},{"key":"10.1016\/j.inffus.2026.104189_bib0131","unstructured":"T. Gunter, Z. Wang, C. Wang, R. Pang, A. Narayanan, A. Zhang, B. Zhang, C. Chen, C. Chiu, D. Qiu, et al., Apple intelligence foundation language models, (2024). 10.48550\/ARXIV.2407.21075."},{"key":"10.1016\/j.inffus.2026.104189_bib0132","unstructured":"L. Yang, Y. Tian, B. Li, X. Zhang, K. Shen, Y. Tong, M. Wang, MMaDA: multimodal large diffusion language models, (2025). 10.48550\/ARXIV.2505.15809."},{"key":"10.1016\/j.inffus.2026.104189_sbref0133","series-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (ACL)","first-page":"637","article-title":"Multimodal instruction tuning with conditional mixture of LoRA","author":"Shen","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0134","doi-asserted-by":"crossref","DOI":"10.7717\/peerj-cs.1964","article-title":"Adapting multilingual vision language transformers for low-resource urdu optical character recognition (OCR)","volume":"10","author":"Cheema","year":"2024","journal-title":"PeerJ Comput. Sci."},{"key":"10.1016\/j.inffus.2026.104189_sbref0135","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV)","first-page":"15359","article-title":"Lip reading for low-resource languages by learning and combining general speech knowledge and language-specific knowledge","author":"Kim","year":"2023"},{"issue":"C","key":"10.1016\/j.inffus.2026.104189_sbref0136","article-title":"Sentiment analysis on a low-resource language dataset using multimodal representation learning and cross-lingual transfer learning","volume":"157","author":"Aruna Gladys","year":"2024","journal-title":"Appl. Soft Comput."},{"key":"10.1016\/j.inffus.2026.104189_sbref0137","series-title":"Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","first-page":"1","article-title":"Improving massively multilingual ASR with auxiliary CTC objectives","author":"Chen","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0138","series-title":"Proceedings of the 3rd Workshop on Multi-lingual Representation Learning (MRL)","first-page":"184","article-title":"CAPIVARA: cost-efficient approach for improving multilingual CLIP performance on low-resource languages","author":"dos Santos","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0139","doi-asserted-by":"crossref","first-page":"2544","DOI":"10.1109\/TASLP.2024.3393772","article-title":"Visually grounded few-shot word learning in low-resource settings","volume":"32","author":"Nortje","year":"2024","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"10.1016\/j.inffus.2026.104189_sbref0140","series-title":"Proceedings of the 38th International Conference on Machine Learning (ICML)","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","volume":"139","author":"Radford","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_sbref0141","series-title":"Proceedings of the 35th International Conference on Neural Information Processing Systems (NeurIPS)","first-page":"200","article-title":"Multimodal few-shot learning with frozen language models","author":"Tsimpoukelli","year":"2021"},{"key":"10.1016\/j.inffus.2026.104189_sbref0142","series-title":"Proceedings of the AAAI Conference on Artificial Intelligence (AAAI)","first-page":"3081","article-title":"An empirical study of GPT-3 for few-shot knowledge-based VQA","volume":"36","author":"Yang","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_sbref0143","series-title":"Proceedings of International Conference on Machine Learning and Data Engineering (ICMLDE)","first-page":"979","article-title":"English-assamese multimodal neural machine translation using transliteration-based phrase augmentation approach","volume":"218","author":"Laskar","year":"2023"},{"key":"10.1016\/j.inffus.2026.104189_sbref0144","series-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (ACL)","first-page":"12753","article-title":"Peacock: a family of arabic multimodal large language models and benchmarks","author":"Alwajih","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_bib0145","doi-asserted-by":"crossref","unstructured":"C. Wang, H. Tang, X. Yang, Y. Xie, J. Suh, S. Sitaram, J. Huang, Y. Xie, Z. Gong, X. Xie, F. Wu, Uncovering inequalities in new knowledge learning by large language models across different languages, (2025). https:\/\/arxiv.org\/abs\/2503.04064.","DOI":"10.1073\/pnas.2514626122"},{"key":"10.1016\/j.inffus.2026.104189_sbref0146","series-title":"Findings of the Association for Computational Linguistics (NAACL)","first-page":"157","article-title":"FedNLP: benchmarking federated learning methods for natural language processing tasks","author":"Lin","year":"2022"},{"key":"10.1016\/j.inffus.2026.104189_bib0147","unstructured":"W. Zhao, Y. Chen, R. Lee, X. Qiu, Y. Gao, H. Fan, N.D. Lane, Breaking physical and linguistic borders: multilingual federated prompt tuning for low-resource languages, (2025). https:\/\/arxiv.org\/abs\/2507.03003. 10.48550\/arXiv.2507.03003."},{"key":"10.1016\/j.inffus.2026.104189_bib0148","unstructured":"L. Tran, W. Sun, S. Patterson, A. Milanova, Privacy-preserving personalized federated prompt learning for multimodal large language models, (2025). 10.48550\/arXiv.2501.13904."},{"key":"10.1016\/j.inffus.2026.104189_bib0149","unstructured":"M. Andersland, Amharic LLaMA and LLaVA: multimodal LLMs for low resource languages, (2024). 10.48550\/arXiv.2403.06354."},{"key":"10.1016\/j.inffus.2026.104189_sbref0150","series-title":"Findings of the Association for Computational Linguistics (ACL)","first-page":"2668","article-title":"The language barrier: dissecting safety challenges of LLMs in multilingual contexts","author":"Shen","year":"2024"},{"key":"10.1016\/j.inffus.2026.104189_sbref0151","series-title":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (ACL)","first-page":"18761","article-title":"Global MMLU: understanding and addressing cultural and linguistic biases in multilingual evaluation","author":"Singh","year":"2025"}],"container-title":["Information Fusion"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1566253526000680?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1566253526000680?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,3]],"date-time":"2026-03-03T16:30:02Z","timestamp":1772555402000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1566253526000680"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":151,"alternative-id":["S1566253526000680"],"URL":"https:\/\/doi.org\/10.1016\/j.inffus.2026.104189","relation":{},"ISSN":["1566-2535"],"issn-type":[{"value":"1566-2535","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Large multimodal models for low-resource languages: A survey","name":"articletitle","label":"Article Title"},{"value":"Information Fusion","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.inffus.2026.104189","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 The Authors. Published by Elsevier B.V.","name":"copyright","label":"Copyright"}],"article-number":"104189"}}