{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,12]],"date-time":"2026-05-12T16:24:37Z","timestamp":1778603077740,"version":"3.51.4"},"reference-count":24,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T00:00:00Z","timestamp":1770508800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T00:00:00Z","timestamp":1770508800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Supercomput"],"DOI":"10.1007\/s11227-026-08256-4","type":"journal-article","created":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T07:50:59Z","timestamp":1770537059000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Scalable RL-based data generation and multi-resolution architecture for code-switched speech recognition: a high-performance computing approach"],"prefix":"10.1007","volume":"82","author":[{"given":"Hemant","family":"Palivela","sequence":"first","affiliation":[]},{"given":"Meera","family":"Narvekar","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,2,8]]},"reference":[{"key":"8256_CR1","unstructured":"John Adams et\u00a0al. Crossvoice: A cascade-based s2st system with cross-lingual prosody transfer. arXiv preprint arXiv:2406.00021, 2024"},{"key":"8256_CR2","doi-asserted-by":"crossref","unstructured":"Babu A, Wang C, Tjandra A, Lakhotia K, Xu Q, Goyal N et\u00a0al (2021) Xls-r: Self-supervised cross-lingual speech representation learning at scale. In: Proceedings Interspeech, pages 2278\u20132282","DOI":"10.21437\/Interspeech.2022-143"},{"key":"8256_CR3","unstructured":"Baevski A, Zhou Y, Mohamed A, Auli M (2020) wav2vec 2.0: A framework for self-supervised learning of speech representations. Adv Neural Inf Process Syst, 33:12449\u201312460"},{"issue":"6","key":"8256_CR4","doi-asserted-by":"publisher","first-page":"1505","DOI":"10.1109\/JSTSP.2022.3188113","volume":"16","author":"S Chen","year":"2022","unstructured":"Chen S, Wang C, Zhengyang Chen YW, Liu S, Chen Z et al (2022) Wavlm: large-scale self-supervised pre-training for full stack speech processing. IEEE J Selected Topics Signal Process 16(6):1505\u20131518","journal-title":"IEEE J Selected Topics Signal Process"},{"key":"8256_CR5","unstructured":"Dao T (2023) Flashattention-2: Faster attention with better parallelism and work partitioning. Adv Neural Inf Process Syst"},{"key":"8256_CR6","first-page":"16344","volume":"35","author":"Fast and memory-efficient exact attention with io-awareness","year":"2022","unstructured":"Fast and memory-efficient exact attention with io-awareness (2022) Tri Dao, Daniel Y Fu, Stefano Ermon, Atri Rudra, and Christopher R\u00e9. Flashattention. Adv Neural Inf Process Syst 35:16344\u201316359","journal-title":"Adv Neural Inf Process Syst"},{"key":"8256_CR7","doi-asserted-by":"crossref","unstructured":"Do\u011fru\u00f6z AS, Sitaram S, Bullock BE, Toribio AJ (2021) A survey of code-switching: Linguistic and social perspectives for language technologies. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics, 1654\u20131666","DOI":"10.18653\/v1\/2021.acl-long.131"},{"key":"8256_CR8","unstructured":"Gandhi S, von Platen P, Rush AM (2024) Distil-whisper: Robust knowledge distillation via large-scale pseudo labelling. arXiv preprint arXiv:2311.00430, 2023. Updated March"},{"key":"8256_CR9","unstructured":"Ganin Y, Ustinova E, Ajakan H, Germain P, Larochelle H, Laviolette F, Marchand M, Lempitsky V (2015) Domain-adversarial training of neural networks. arXiv preprint arXiv: 1505.07818"},{"key":"8256_CR10","doi-asserted-by":"crossref","unstructured":"Graves A, Fern\u00e1ndez S, Gomez F, Schmidhuber J (2006) Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd International Conference on Machine Learning, 369\u2013376","DOI":"10.1145\/1143844.1143891"},{"key":"8256_CR11","doi-asserted-by":"crossref","unstructured":"Gulati A, Qin J, Chiu C-C, Parmar N, Zhang Y, Yu J, Han W et\u00a0al (2020) Conformer: Convolution-augmented transformer for speech recognition. In: Proceedings Interspeech, 5036\u20135040","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"8256_CR12","doi-asserted-by":"publisher","first-page":"3451","DOI":"10.1109\/TASLP.2021.3122291","volume":"29","author":"W-N Hsu","year":"2021","unstructured":"Hsu W-N, Bolte B, Tsai Y-HH, Lakhotia K, Salakhutdinov R, Mohamed A (2021) Hubert: self-supervised speech representation learning by masked prediction of hidden units. IEEE\/ACM Trans Audio Speech Lang Process 29:3451\u20133460","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"8256_CR13","doi-asserted-by":"crossref","unstructured":"Kim K, Wu F, Peng Y, Pan J, Sriram P, Watanabe S (2023) E-branchformer: Branchformer with enhanced merging for automatic speech recognition. In 2022 IEEE Spoken Lang Technol Workshop (SLT), 84\u201391","DOI":"10.1109\/SLT54892.2023.10022656"},{"key":"8256_CR14","first-page":"9361","volume":"35","author":"S Kim","year":"2022","unstructured":"Kim S, Gholami A, Shaw A, Lee N, Mangalam K, Malik J, Mahoney MW, Keutzer K (2022) Squeezeformer: an efficient transformer for automatic speech recognition. Adv Neural Inf Process Syst 35:9361\u20139373","journal-title":"Adv Neural Inf Process Syst"},{"key":"8256_CR15","volume-title":"Mobileconformer: Compressed navigable conformer for efficient speech recognition","author":"D Mehta","year":"2023","unstructured":"Mehta D, Hery D, Fathan A et al (2023) Mobileconformer: Compressed navigable conformer for efficient speech recognition. In Proc, Interspeech"},{"key":"8256_CR16","unstructured":"Micikevicius P, Stosic D, Burgess N et\u00a0al (2022) Fp8 formats for deep learning. arXiv preprint arXiv:2209.05433"},{"key":"8256_CR17","unstructured":"Peng Y, Dalmia S, Lane I, Watanabe S (2022) Branchformer: Parallel mlp-attention architectures to capture local and global context for speech recognition. In: International Conference on Machine Learning (ICML), 17627\u201317643"},{"key":"8256_CR18","doi-asserted-by":"crossref","unstructured":"Peng Y, Sudo Y, Shakeel M, Watanabe S (2024) Owsm v3.1: Better and faster open whisper-style speech models based on e-branchformer. In: Proceedings Interspeech","DOI":"10.21437\/Interspeech.2024-1194"},{"key":"8256_CR19","unstructured":"Pratap V, Tjandra A, Shi B, Tomasello P et\u00a0al (2023) Scaling speech technology to 1,000+ languages. arXiv preprint arXiv:2305.13516"},{"key":"8256_CR20","unstructured":"Radford A, Kim JW, Xu T, Brockman G, McLeavey C, Sutskever I (2023) Robust speech recognition via large-scale weak supervision. In: International Conference on Machine Learning (ICML), 28492\u201328518"},{"key":"8256_CR21","volume":"218","author":"H Shi","year":"2024","unstructured":"Shi H, Cui C, Wang L (2024) Language-specific boundary learning for improving mandarin-english code-switching speech recognition. Appl Acoust 218:109886","journal-title":"Appl Acoust"},{"issue":"8","key":"8256_CR22","first-page":"1138","volume":"27","author":"S Sitaram","year":"2019","unstructured":"Sitaram S, Chandu KR, Rallabandi SK, Black AW (2019) A survey of code-switched speech and language processing. IEEE\/ACM Trans Audio Speech Lang Process 27(8):1138\u20131158","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"8256_CR23","unstructured":"Zhang R, Liu T et\u00a0al (2024a) Simplefsdp: Simpler fully sharded data parallel with torch.compile. arXiv preprint arXiv:2411.00284"},{"key":"8256_CR24","unstructured":"Zhang Y, Ruan W, Fan E, Wang Y et\u00a0al (2023) Google usm: Scaling automatic speech recognition beyond 100 languages. In: ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), pages 1\u20135, 2024b. Originally released as arXiv:2303.01037 in"}],"container-title":["The Journal of Supercomputing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08256-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11227-026-08256-4","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11227-026-08256-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,8]],"date-time":"2026-02-08T07:51:03Z","timestamp":1770537063000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11227-026-08256-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,2,8]]},"references-count":24,"journal-issue":{"issue":"3","published-online":{"date-parts":[[2026,2]]}},"alternative-id":["8256"],"URL":"https:\/\/doi.org\/10.1007\/s11227-026-08256-4","relation":{},"ISSN":["1573-0484"],"issn-type":[{"value":"1573-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026,2,8]]},"assertion":[{"value":"26 November 2025","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 January 2026","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 February 2026","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"137"}}