{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T03:45:51Z","timestamp":1773891951809,"version":"3.50.1"},"reference-count":58,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,7,1]],"date-time":"2026-07-01T00:00:00Z","timestamp":1782864000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T00:00:00Z","timestamp":1764201600000},"content-version":"vor","delay-in-days":0,"URL":"http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/"}],"funder":[{"DOI":"10.13039\/501100003086","name":"Government of the Basque Country","doi-asserted-by":"publisher","award":["KK-2024\/00050"],"award-info":[{"award-number":["KK-2024\/00050"]}],"id":[{"id":"10.13039\/501100003086","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Computer Speech &amp; Language"],"published-print":{"date-parts":[[2026,7]]},"DOI":"10.1016\/j.csl.2025.101905","type":"journal-article","created":{"date-parts":[[2025,11,27]],"date-time":"2025-11-27T16:43:27Z","timestamp":1764261807000},"page":"101905","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Do modern speech LLMs and re-scoring techniques improve bilingual ASR performance for Basque and Spanish in domain-specific contexts?"],"prefix":"10.1016","volume":"99","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3585-6346","authenticated-orcid":false,"given":"Ander","family":"Gonz\u00e1lez-Docasal","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4946-9232","authenticated-orcid":false,"given":"Juan Camilo","family":"V\u00e1squez-Correa","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0731-1317","authenticated-orcid":false,"given":"Haritz","family":"Arzelus","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7938-4486","authenticated-orcid":false,"given":"Aitor","family":"\u00c1lvarez","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7300-8562","authenticated-orcid":false,"given":"Santiago A.","family":"Moreno-Acevedo","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.csl.2025.101905_b1","series-title":"Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing","first-page":"4895","article-title":"GQA: Training generalized multi-query transformer models from multi-head checkpoints","author":"Ainslie","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b2","series-title":"Common voice: A massively-multilingual speech corpus","author":"Ardila","year":"2019"},{"key":"10.1016\/j.csl.2025.101905_b3","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.csl.2025.101905_b4","series-title":"Neural machine translation by jointly learning to align and translate","author":"Bahdanau","year":"2014"},{"key":"10.1016\/j.csl.2025.101905_b5","doi-asserted-by":"crossref","unstructured":"Campione, E., V\u00e9ronis, J., 1998. A multilingual prosodic database. In: Fifth International Conference on Spoken Language Processing.","DOI":"10.21437\/ICSLP.1998-609"},{"key":"10.1016\/j.csl.2025.101905_b6","unstructured":"Casacuberta, F., Garcia, R., Llisterri, J., Nadeu, C., Pardo, J., Rubio, A., 1991. Development of Spanish corpora for speech research (Albayzin). In: Workshop on International Cooperation and Standardization of Speech Databases and Speech I\/O Assesment Methods, Chiavari, Italy. pp. 26\u201328."},{"issue":"4","key":"10.1016\/j.csl.2025.101905_b7","doi-asserted-by":"crossref","first-page":"359","DOI":"10.1006\/csla.1999.0128","article-title":"An empirical study of smoothing techniques for language modeling","volume":"13","author":"Chen","year":"1999","journal-title":"Comput. Speech Lang."},{"key":"10.1016\/j.csl.2025.101905_b8","series-title":"Generative error correction for code-switching speech recognition using large language models","author":"Chen","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b9","unstructured":"del Pozo, A., Aliprandi, C., \u00c1lvarez, A., Mendes, C., Neto, J.P., Paulo, S., Piccinini, N., Raffaelli, M., 2014. SAVAS: Collecting, Annotating and Sharing Audiovisual Language Resources for Automatic Subtitling. In: LREC. pp. 432\u2013436."},{"key":"10.1016\/j.csl.2025.101905_b10","series-title":"The LLaMA 3 herd of models","author":"Dubey","year":"2024"},{"key":"10.1016\/j.csl.2025.101905_b11","doi-asserted-by":"crossref","unstructured":"Etchegoyhen, T., Arzelus, H., Ugarte, H.G., Alvarez, A., Gonz\u00e1lez-Docasal, A., Fernandez, E.B., 2021. mintzai-ST: Corpus and Baselines for Basque-Spanish Speech Translation. In: IberSPEECH.","DOI":"10.21437\/IberSPEECH.2021-41"},{"key":"10.1016\/j.csl.2025.101905_b12","series-title":"Proceedings of ICASSP","first-page":"7049","article-title":"RNN-Transducer with stateless prediction network","author":"Ghodsi","year":"2020"},{"issue":"2","key":"10.1016\/j.csl.2025.101905_b13","doi-asserted-by":"crossref","DOI":"10.1121\/10.0024876","article-title":"Evaluating OpenAI\u2019s Whisper ASR: Performance analysis across diverse accents and speaker traits","volume":"4","author":"Graham","year":"2024","journal-title":"JASA Express Lett."},{"key":"10.1016\/j.csl.2025.101905_b14","series-title":"Sequence transduction with recurrent neural networks","author":"Graves","year":"2012"},{"key":"10.1016\/j.csl.2025.101905_b15","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J., 2006. Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of ICML. pp. 369\u2013376.","DOI":"10.1145\/1143844.1143891"},{"key":"10.1016\/j.csl.2025.101905_b16","series-title":"First-pass large vocabulary continuous speech recognition using bi-directional recurrent DNNs","author":"Hannun","year":"2014"},{"key":"10.1016\/j.csl.2025.101905_b17","series-title":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","first-page":"3828","article-title":"OlympiadBench: A challenging benchmark for promoting AGI with olympiad-level bilingual multimodal scientific problems","author":"He","year":"2024"},{"issue":"6","key":"10.1016\/j.csl.2025.101905_b18","doi-asserted-by":"crossref","first-page":"82","DOI":"10.1109\/MSP.2012.2205597","article-title":"Deep neural networks for acoustic modeling in speech recognition: The shared views of four research groups","volume":"29","author":"Hinton","year":"2012","journal-title":"IEEE Signal Process. Mag."},{"key":"10.1016\/j.csl.2025.101905_b19","series-title":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"6533","article-title":"HuBERT: How much can a bad teacher benefit ASR pre-training?","author":"Hsu","year":"2021"},{"key":"10.1016\/j.csl.2025.101905_b20","doi-asserted-by":"crossref","unstructured":"Hu, Y., Li, T., Lu, Q., Shao, W., He, J., Qiao, Y., Luo, P., 2024. OmniMedVQA: A New Large-Scale Comprehensive Evaluation Benchmark for Medical LVLM. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition. CVPR, pp. 22170\u201322183.","DOI":"10.1109\/CVPR52733.2024.02093"},{"key":"10.1016\/j.csl.2025.101905_b21","series-title":"Lora: Low-rank adaptation of large language models","author":"Hu","year":"2021"},{"key":"10.1016\/j.csl.2025.101905_b22","series-title":"SwissText\/KONVENS","article-title":"UZH TILT: A kaldi recipe for Swiss German Speech to standard german text","author":"Kew","year":"2020"},{"key":"10.1016\/j.csl.2025.101905_b23","series-title":"Proceedings of INTERSPEECH","first-page":"2068","article-title":"Pruned RNN-T for fast, memory-efficient ASR training","author":"Kuang","year":"2022"},{"key":"10.1016\/j.csl.2025.101905_b24","series-title":"IberSPEECH 2024","first-page":"289","article-title":"Speech technologies in the ILENIA project: Generating resources to develop voice applications in the official languages of Spain","author":"K\u00fclebi","year":"2024"},{"key":"10.1016\/j.csl.2025.101905_b25","series-title":"2024 IEEE Spoken Language Technology Workshop","first-page":"431","article-title":"Crossmodal ASR error correction with discrete speech units","author":"Li","year":"2024"},{"key":"10.1016\/j.csl.2025.101905_b26","doi-asserted-by":"crossref","unstructured":"Li, S., Chen, C., Kwok, C.Y., Chu, C., Chng, E.S., Kawai, H., 2024b. Investigating asr error correction with large language model and multilingual 1-best hypotheses. In: Proc. Interspeech. pp. 1315\u20131319.","DOI":"10.21437\/Interspeech.2024-368"},{"key":"10.1016\/j.csl.2025.101905_b27","series-title":"ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1","article-title":"Revise, reason, and recognize: Llm-based emotion recognition via emotion-specific prompts and asr error correction","author":"Li","year":"2025"},{"key":"10.1016\/j.csl.2025.101905_b28","series-title":"International Symposium on Neural Networks","first-page":"133","article-title":"Audio-LLM: Activating the capabilities of large language models to comprehend audio data","author":"Li","year":"2024"},{"issue":"1","key":"10.1016\/j.csl.2025.101905_b29","doi-asserted-by":"crossref","DOI":"10.1561\/116.00000050","article-title":"Recent advances in end-to-end automatic speech recognition","volume":"11","author":"Li","year":"2022","journal-title":"APSIPA Trans. Signal Inf. Process."},{"issue":"24","key":"10.1016\/j.csl.2025.101905_b30","doi-asserted-by":"crossref","first-page":"5412","DOI":"10.3390\/app9245412","article-title":"Albayzin 2018 evaluation: the iberspeech-RTVE challenge on speech technologies for spanish broadcast media","volume":"9","author":"Lleida","year":"2019","journal-title":"Appl. Sci."},{"key":"10.1016\/j.csl.2025.101905_b31","unstructured":"Lleida, E., Ortega, A., Miguel, A., Baz\u00e1n-Gil, V., P\u00e9rez, C., Zotano, M., De Prada, A., 2024. RTVE 2024 Databases Description. Tech. Rep., Retrived from http:\/\/catedrartve.unizar.es\/rtvedatabase.html."},{"key":"10.1016\/j.csl.2025.101905_b32","doi-asserted-by":"crossref","DOI":"10.1109\/TASLPRO.2025.3551083","article-title":"ASR error correction using large language models","author":"Ma","year":"2025","journal-title":"IEEE Trans. Audio, Speech Lang. Process."},{"key":"10.1016\/j.csl.2025.101905_b33","first-page":"83","article-title":"Codeswitching, translanguaging and bilingual grammar","author":"MacSwan","year":"2022","journal-title":"Multiling. Perspect. Translanguaging"},{"key":"10.1016\/j.csl.2025.101905_b34","series-title":"Phi-4-mini technical report: Compact yet powerful multimodal language models via mixture-of-LoRAs","author":"Microsoft","year":"2025"},{"key":"10.1016\/j.csl.2025.101905_b35","doi-asserted-by":"crossref","DOI":"10.1109\/LSP.2025.3540953","article-title":"MLSS: Mandarin english code-switching speech recognition via mutual learning-based semi-supervised method","author":"Nga","year":"2025","journal-title":"IEEE Signal Process. Lett."},{"key":"10.1016\/j.csl.2025.101905_b36","series-title":"2012 8th International Symposium on Chinese Spoken Language Processing","first-page":"301","article-title":"Investigation of deep neural networks (DNN) for large vocabulary continuous speech recognition: Why DNN surpasses GMMs in acoustic modeling","author":"Pan","year":"2012"},{"key":"10.1016\/j.csl.2025.101905_b37","series-title":"Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics","first-page":"311","article-title":"Bleu: a method for automatic evaluation of machine translation","author":"Papineni","year":"2002"},{"key":"10.1016\/j.csl.2025.101905_b38","series-title":"Specaugment: A simple data augmentation method for automatic speech recognition","author":"Park","year":"2019"},{"key":"10.1016\/j.csl.2025.101905_b39","doi-asserted-by":"crossref","unstructured":"Penagarikano, M., Varona, A., Bordel, G., Rodriguez-Fuentes, L.J., Sarriena, B., 2024. Albayzin 2024 Bilingual Basque-Spanish Speech to Text (BBS-S2T) Challenge: Datasets, Systems and Results. In: Proc. IberSPEECH 2024. pp. 319\u2013324.","DOI":"10.21437\/IberSPEECH.2024-67"},{"key":"10.1016\/j.csl.2025.101905_b40","series-title":"Proceedings of the Tenth Workshop on Statistical Machine Translation","first-page":"392","article-title":"chrF: character n-gram F-score for automatic MT evaluation","author":"Popovi\u0107","year":"2015"},{"key":"10.1016\/j.csl.2025.101905_b41","unstructured":"Povey, D., Zelasko, P., Khudanpur, S., 2021. Speech recognition with next-generation kaldi (k2, lhotse, icefall). In: INTERSPEECH: Tutorials."},{"issue":"97","key":"10.1016\/j.csl.2025.101905_b42","first-page":"1","article-title":"Scaling speech technology to 1000+ languages","volume":"25","author":"Pratap","year":"2024","journal-title":"J. Mach. Learn. Res."},{"key":"10.1016\/j.csl.2025.101905_b43","series-title":"Proceedings of ICML","article-title":"Robust speech recognition via large-scale weak supervision","author":"Radford","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b44","series-title":"Proceedings of ASRU","first-page":"1","article-title":"Fast conformer with linearly scalable attention for efficient speech recognition","author":"Rekesh","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b45","series-title":"Proceddings of ASRU","first-page":"1","article-title":"Fast conformer with linearly scalable attention for efficient speech recognition","author":"Rekesh","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b46","series-title":"Neural machine translation of rare words with subword units","author":"Sennrich","year":"2015"},{"key":"10.1016\/j.csl.2025.101905_b47","series-title":"Open Automatic Speech Recognition Leaderboard","author":"Srivastav","year":"2024"},{"key":"10.1016\/j.csl.2025.101905_b48","series-title":"Proceedings of the 19th Australasian Document Computing Symposium","first-page":"58","article-title":"Improvements to BM25 and language models examined","author":"Trotman","year":"2014"},{"issue":"5","key":"10.1016\/j.csl.2025.101905_b49","doi-asserted-by":"crossref","DOI":"10.3390\/app14051951","article-title":"A Bilingual Basque\u2013Spanish dataset of parliamentary sessions for the development and evaluation of speech technology","volume":"14","author":"Varona","year":"2024","journal-title":"Appl. Sci."},{"key":"10.1016\/j.csl.2025.101905_b50","doi-asserted-by":"crossref","unstructured":"V\u00e1squez-Correa, J.C., Alvarez, A., Arzelus, H., Moreno-Acevedo, S.A., Gonz\u00e1lez-Docasal, A., Mart\u00edn-Do\u00f1as, J.M., 2024. The Vicomtech Speech Transcription Systems for the Albayz\u00edn 2024 Bilingual Basque-Spanish Speech to Text (BBS-S2T) Challenge. In: Proceedings of IberSPEECH. pp. 11\u201313.","DOI":"10.21437\/IberSPEECH.2024-64"},{"key":"10.1016\/j.csl.2025.101905_b51","series-title":"International Conference on Text, Speech, and Dialogue","first-page":"226","article-title":"When whisper meets TTS: Domain adaptation using only synthetic speech data","author":"V\u00e1squez-Correa","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b52","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.csl.2025.101905_b53","series-title":"Proceedings of ASRU","first-page":"1","article-title":"On decoder-only architecture for speech-to-text and large language model integration","author":"Wu","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b54","series-title":"Proceedings of ASRU","first-page":"1","article-title":"Generative speech recognition error correction with large language models and task-activating prompting","author":"Yang","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b55","unstructured":"Yao, Z., Guo, L., Yang, X., Kang, W., Kuang, F., Yang, Y., Jin, Z., Lin, L., Povey, D., 2023. Zipformer: A faster and better encoder for automatic speech recognition. In: Proc. ICLR."},{"key":"10.1016\/j.csl.2025.101905_b56","series-title":"Web2Code: A large-scale webpage-to-code dataset and evaluation framework for multimodal LLMs","author":"Yun","year":"2024"},{"key":"10.1016\/j.csl.2025.101905_b57","series-title":"SpeechGPT: Empowering large language models with intrinsic cross-modal conversational abilities","author":"Zhang","year":"2023"},{"key":"10.1016\/j.csl.2025.101905_b58","doi-asserted-by":"crossref","unstructured":"Zusag, M., Wagner, L., Thallinger, B., 2024. CrisperWhisper: Accurate Timestamps on Verbatim Speech Transcriptions. In: Proceedings of INTERSPEECH. pp. 1265\u20131269.","DOI":"10.21437\/Interspeech.2024-731"}],"container-title":["Computer Speech &amp; Language"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230825001305?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0885230825001305?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,19]],"date-time":"2026-03-19T01:06:00Z","timestamp":1773882360000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0885230825001305"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,7]]},"references-count":58,"alternative-id":["S0885230825001305"],"URL":"https:\/\/doi.org\/10.1016\/j.csl.2025.101905","relation":{},"ISSN":["0885-2308"],"issn-type":[{"value":"0885-2308","type":"print"}],"subject":[],"published":{"date-parts":[[2026,7]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Do modern speech LLMs and re-scoring techniques improve bilingual ASR performance for Basque and Spanish in domain-specific contexts?","name":"articletitle","label":"Article Title"},{"value":"Computer Speech & Language","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.csl.2025.101905","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2025 The Authors. Published by Elsevier Ltd.","name":"copyright","label":"Copyright"}],"article-number":"101905"}}