{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T07:06:31Z","timestamp":1775199991301,"version":"3.50.1"},"reference-count":57,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100006785","name":"Google","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100006785","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434707","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-8","source":"Crossref","is-referenced-by-count":0,"title":["mSTEB: Massively Multilingual Evaluation of LLMs on Speech and Text Tasks"],"prefix":"10.1109","author":[{"given":"Luel Hagos","family":"Beyene","sequence":"first","affiliation":[{"name":"AIMS RIC"}]},{"given":"Vivek","family":"Verma","sequence":"additional","affiliation":[{"name":"Mila - Quebec AI Institute"}]},{"given":"Min","family":"Ma","sequence":"additional","affiliation":[{"name":"Google DeepMind"}]},{"given":"Jesujoba O.","family":"Alabi","sequence":"additional","affiliation":[{"name":"Saarland University"}]},{"given":"Fabian David","family":"Schmidt","sequence":"additional","affiliation":[{"name":"University of W&#x00FC;rzburg"}]},{"given":"Joyce","family":"Nakatumba-Nabende","sequence":"additional","affiliation":[{"name":"Makerere University"}]},{"given":"David Ifeoluwa","family":"Adelani","sequence":"additional","affiliation":[{"name":"Mila - Quebec AI Institute"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Gpt-4 technical report","author":"OpenAI","year":"2023"},{"key":"ref2","article-title":"Deepseek-r1: Incentivizing reasoning capability in 11 ms via reinforcement learning","volume":"abs\/2501.12948","author":"Guo","year":"2025","journal-title":"ArXiv"},{"key":"ref3","article-title":"The llama 3 herd of models","volume":"abs\/2407.21783","author":"Dubey","year":"2024","journal-title":"ArXiv"},{"key":"ref4","article-title":"Qwen-audio: Advancing universal audio understanding via unified large-scale audio-language models","author":"Chu","year":"2023","journal-title":"arXiv preprint arXiv:2311.07919"},{"key":"ref5","article-title":"Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context","author":"Team","year":"2024","journal-title":"arXiv preprint arXiv:2403.05530"},{"key":"ref6","article-title":"Salmonn: Towards generic hearing abilities for large language models","volume-title":"The Twelfth International Conference on Learning Representations.","author":"Tang"},{"key":"ref7","article-title":"Visual instruction tuning","volume":"abs\/2304.08485","author":"Liu","year":"2023","journal-title":"ArXiv"},{"key":"ref8","article-title":"Pangea: A fully open multilingual multimodal 11 m for 39 languages","volume":"abs\/2410.16153","author":"Yue","year":"2024","journal-title":"ArXiv"},{"key":"ref9","first-page":"226","article-title":"SIB-200: A simple, inclusive, and big evaluation dataset for topic classification in 200+ languages and dialects","volume-title":"EACL","author":"Adelani"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2023.wmt-1.40"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.naacl-long.143"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-acl.709"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.emnlp-main.570"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2248"},{"key":"ref15","first-page":"8440","article-title":"Unsupervised cross-lingual representation learning at scale","author":"Conneau","year":"2020","journal-title":"ACL"},{"key":"ref16","article-title":"Deberta: Decoding-enhanced bert with disentangled attention","volume-title":"International Conference on Learning Representations","author":"He"},{"key":"ref17","first-page":"483","article-title":"mT5: A massively multilingual pre- trained text-to-text transformer","author":"Xue","year":"2021","journal-title":"NAACL."},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-143"},{"key":"ref19","first-page":"10205","article-title":"Towards robust speech representation learning for thousands of languages","volume-title":"EMNLP.","author":"Chen"},{"key":"ref20","first-page":"30","article-title":"SpiRit-LM: Interleaved spoken and written language model","volume":"13","author":"Nguyen","year":"2025","journal-title":"TACL"},{"key":"ref21","article-title":"No language left behind: Scaling human-centered machine translation","author":"Team","year":"2022"},{"key":"ref22","first-page":"749","article-title":"The belebele benchmark: a parallel reading comprehension dataset in 122 language variants","volume":"2024","author":"Bandarkar","journal-title":"ACL."},{"key":"ref23","first-page":"798","article-title":"Fleurs: Few-shot learning evaluation of universal representations of speech","volume-title":"2022 IEEE Spoken Language Technology Workshop (SLT)","author":"Conneau"},{"key":"ref24","article-title":"Fleursslu: A massively multilingual benchmark for spoken language understanding","volume":"abs\/2501.06117","author":"Schmidt","year":"2025","journal-title":"ArXiv"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49660.2025.10890017"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/N18-1101"},{"key":"ref27","first-page":"2475","article-title":"XNLI: Evaluating cross-lingual sentence representations","volume-title":"EMNLP.","author":"Conneau"},{"key":"ref28","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2025.naacl-long.139","article-title":"Irokobench: A new benchmark for african languages in the age of large language models","author":"Adelani","year":"2025"},{"key":"ref29","first-page":"10994","article-title":"IndicXNLI: Evaluating multilingual inference for Indian languages","volume-title":"EMNLP","author":"Aggarwal"},{"key":"ref30","first-page":"6279","article-title":"AmericasNLI: Evaluating zero-shot natural language understanding of pretrained multilingual models in truly low-resource languages","volume-title":"ACL","author":"Ebrahimi"},{"key":"ref31","first-page":"2556","article-title":"Building a data infrastructure for a mid-resource language: The case of Catalan","volume-title":"LREC-COLING 2024.","author":"Gonzalez-Agirre"},{"key":"ref32","first-page":"5155","article-title":"SEACrowd: A multilingual multimodal data hub and benchmark suite for Southeast Asian languages","volume-title":"EMNLP","author":"Lovenia"},{"key":"ref33","first-page":"10511","article-title":"IndoNLI: A natural language inference dataset for Indonesian","volume-title":"EMNLP.","author":"Mahendra"},{"key":"ref34","first-page":"2957","article-title":"JGLUE: Japanese general language understanding evaluation","volume-title":"LREC","author":"Kurihara"},{"key":"ref35","first-page":"9050","article-title":"InferBR: A natural language inference dataset in Portuguese","volume-title":"LREC-COLING 2024","author":"Bencke"},{"key":"ref36","first-page":"1674","article-title":"Evaluation of sentence representations in Polish","volume-title":"LREC","author":"Dadas"},{"key":"ref37","article-title":"Jampatoisnli: A jamaican patois natural language inference dataset","volume":"abs\/2212.03419","author":"Armstrong","year":"2022","journal-title":"CoRR"},{"key":"ref38","article-title":"Klue: Korean language understanding evaluation","author":"Park","year":"2021"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.acl-long.15"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00474"},{"issue":"97","key":"ref41","first-page":"1","article-title":"Scaling speech technology to 1,000+ languages","volume":"25","author":"Pratap","year":"2024","journal-title":"Journal of Machine Learning Research"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1038\/s41586-024-08359-z"},{"key":"ref43","first-page":"6155","article-title":"GlotLID: Language identification for low-resource languages","volume-title":"Findings of ACL: EMNLP 2023.","author":"Kargaran"},{"key":"ref44","first-page":"13142","article-title":"XLM-V: Overcoming the vocabulary bottleneck in multilingual masked language models","volume-title":"EMNLP","author":"Liang"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2024.findings-emnlp.394"},{"key":"ref46","article-title":"The llama 3 herd of models","author":"Grattafiori","year":"2024","journal-title":"arXiv preprint arXiv:2407.21783"},{"key":"ref47","article-title":"LLM2vec: Large language models are secretly powerful text encoders","volume-title":"First Conference on Language Modeling","author":"BehnamGhader"},{"key":"ref48","article-title":"DeBERTav3: Improving deBERTa using ELECTRA-style pre-training with gradient-disentangled embedding sharing","volume-title":"The Eleventh International Conference on Learning Representations","author":"He"},{"key":"ref49","first-page":"1112","article-title":"A broad-coverage challenge corpus for sentence understanding through inference","volume-title":"NAACL","author":"Williams"},{"key":"ref50","article-title":"Gemma 3 technical report","volume-title":"arXiv preprint arXiv:2503.19786","author":"Team","year":"2025"},{"key":"ref51","doi-asserted-by":"crossref","DOI":"10.18653\/v1\/2025.findings-acl.976","article-title":"Afrobench: How good are large language models on african languages?","author":"Ojo","year":"2025"},{"key":"ref52","article-title":"Few-shot learning with multilingual language models","volume":"abs\/2112.10668","author":"Lin","year":"2021","journal-title":"CoRR"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref54","first-page":"612","article-title":"chrF++: words helping character n-grams","author":"Popovi\u0107","year":"2017","journal-title":"WMT"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.52202\/075280-2940"},{"key":"ref56","first-page":"47","article-title":"Are LLMs breaking MT metrics? results of the WMT24 metrics shared task","volume-title":"WMT.","author":"Freitag"},{"key":"ref57","first-page":"6282","article-title":"The state and fate of linguistic diversity and inclusion in the NLP world","author":"Joshi","year":"2020","journal-title":"ACL"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434707.pdf?arnumber=11434707","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:58:39Z","timestamp":1775192319000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434707\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":57,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434707","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}