{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T07:03:44Z","timestamp":1775199824073,"version":"3.50.1"},"reference-count":30,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434664","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-4","source":"Crossref","is-referenced-by-count":0,"title":["Efficient Deployment of Large Speech Recognition Models on GPU"],"prefix":"10.1109","author":[{"given":"Yuekai","family":"Zhang","sequence":"first","affiliation":[{"name":"Nvidia Corporation"}]},{"given":"Shuang","family":"Yu","sequence":"additional","affiliation":[{"name":"Nvidia Corporation"}]},{"given":"Junjie","family":"Lai","sequence":"additional","affiliation":[{"name":"Nvidia Corporation"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref2","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International conference on machine learning.","author":"Radford"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389676"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2024-1194"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2025-1062"},{"key":"ref6","article-title":"Seamless: Multilingual expressive and streaming speech translation","author":"Barrault","year":"2023","journal-title":"arXiv preprint arXiv:2312.05187"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2294"},{"key":"ref8","article-title":"Qwen-audio: Advancing universal audio understanding via unified large-scale audio-language models","author":"Chu","year":"2023","journal-title":"arXiv preprint arXiv:2311.07919"},{"key":"ref9","article-title":"Qwen2-audio technical report","volume-title":"arXiv preprint arXiv:2407.10759","author":"Chu","year":"2024"},{"key":"ref10","article-title":"Osum: Advancing open speech understanding models with limited resources in academia","author":"Geng","year":"2025","journal-title":"arXiv preprint arXiv:2501.13306"},{"key":"ref11","article-title":"Seed-asr: Understanding diverse speech and contexts with 11 m -based speech recognition","author":"Bai","year":"2024","journal-title":"arXiv preprint arXiv:2407.04675"},{"key":"ref12","article-title":"Fireredasr: Open-source industrial-grade mandarin speech recognition models from encoderdecoder to 11m integration","author":"Xu","year":"2025","journal-title":"arXiv preprint arXiv:2501.14350"},{"key":"ref13","article-title":"Distil-whisper: Robust knowledge distillation via large-scale pseudo labelling","author":"Gandhi","year":"2023","journal-title":"arXiv preprint arXiv:2311.00430"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/SLT54892.2023.10022656"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389701"},{"key":"ref16","article-title":"Moonshine: Speech recognition for live transcription and voice commands","author":"Jeffries","year":"2024","journal-title":"arXiv preprint arXiv:2410.15608"},{"key":"ref17","article-title":"Training and inference efficiency of encoder-decoder speech models","author":"\u017belasko","year":"2025","journal-title":"arXiv preprint arXiv:2503.05931"},{"key":"ref18","article-title":"Sherpa-onnx","year":"2025"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.23919\/APSIPAASC55919.2022.9979824"},{"key":"ref20","article-title":"Faster whisper","year":"2025","journal-title":"SYSTRAN"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-78"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1145\/3600006.3613165"},{"key":"ref23","article-title":"Triton","year":"2025","journal-title":"Nvidia"},{"key":"ref24","article-title":"Tensorrt-llm","year":"2025"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICSDA.2017.8384449"},{"key":"ref26","article-title":"Aishell-2: Transforming mandarin asr research into industrial scale","author":"Du","year":"2018","journal-title":"arXiv preprint arXiv:1808.10583"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746682"},{"key":"ref28","article-title":"Funaudiollm: Voice understanding and generation foundation models for natural interaction between humans and 11 ms","author":"An","year":"2024","journal-title":"arXiv preprint arXiv:2407.04051"},{"key":"ref29","first-page":"2024","article-title":"Open automatic speech recognition leaderboard","volume":"27","author":"Srivastav","year":"2023","journal-title":"Verkkoaineisto.j huggingface. co\/spaces\/hf-audio\/open_asr_leaderboard s. Luettu"},{"key":"ref30","article-title":"Flashattention-2: Faster attention with better parallelism and work partitioning","author":"Dao","year":"2023","journal-title":"arXiv preprint arXiv:2307.08691"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434664.pdf?arnumber=11434664","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:57:56Z","timestamp":1775192276000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434664\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":30,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434664","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}