{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T07:00:54Z","timestamp":1775199654267,"version":"3.50.1"},"reference-count":23,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,12,6]],"date-time":"2025-12-06T00:00:00Z","timestamp":1764979200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,12,6]]},"DOI":"10.1109\/asru65441.2025.11434621","type":"proceedings-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T19:48:04Z","timestamp":1775159284000},"page":"1-7","source":"Crossref","is-referenced-by-count":0,"title":["Llasa+: Free Lunch for Accelerated and Streaming Llama-Based Speech Synthesis"],"prefix":"10.1109","author":[{"given":"Wenjie","family":"Tian","sequence":"first","affiliation":[{"name":"Northwestern Polytechnical University,Xi&#x2019;an,China"}]},{"given":"Xinfa","family":"Zhu","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Xi&#x2019;an,China"}]},{"given":"Hanke","family":"Xie","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Xi&#x2019;an,China"}]},{"given":"Zhen","family":"Ye","sequence":"additional","affiliation":[{"name":"Hong Kong University of Science and Technology,Hong Kong,China"}]},{"given":"Wei","family":"Xue","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Xi&#x2019;an,China"}]},{"given":"Lei","family":"Xie","sequence":"additional","affiliation":[{"name":"Northwestern Polytechnical University,Xi&#x2019;an,China"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.acl-long.313"},{"key":"ref2","article-title":"Cosyvoice 2: Scalable streaming speech synthesis with large language models","volume-title":"CoRR","volume":"abs\/2412.10117","author":"Du","year":"2024"},{"key":"ref3","article-title":"Seed-tts: A family of high-quality versatile speech generation models","author":"Anastassiou","year":"2024"},{"key":"ref4","article-title":"Llasa: Scaling train-time and inferencetime compute for llama-based speech synthesis","volume-title":"arXiv preprint arXiv:2502.04128","author":"Ye","year":"2025"},{"key":"ref5","article-title":"Mega-tts 2: Zero-shot text-to-speech with arbitrary length speech prompts","author":"Jiang","year":"2023","journal-title":"CoRR"},{"key":"ref6","article-title":"Spark-tts: An efficient llm-based text-to-speech model with single-stream decoupled speech tokens","author":"Wang","year":"2025","journal-title":"arXiv preprint arXiv:2503.01710"},{"key":"ref7","article-title":"Fireredtts: A foundation text-to-speech framework for industry-level generative speech applications","author":"Guo","year":"2024","journal-title":"arXiv preprint arXiv:2409.03283"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/2025.emnlp-main.989"},{"key":"ref9","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023","journal-title":"CoRR"},{"key":"ref10","article-title":"Cosyvoice: A scalable multilingual zero-shot text-to-speech synthesizer based on supervised semantic tokens","author":"Du","year":"2024","journal-title":"CoRR"},{"key":"ref11","article-title":"Moshi: a speech-text foundation model for real-time dialogue","author":"D\u00e9fossez","year":"2024","journal-title":"CoRR"},{"key":"ref12","article-title":"Baichuanaudio: A unified framework for end-to-end speech interaction","author":"Li","year":"2025","journal-title":"CoRR"},{"key":"ref13","article-title":"Freeze-omni: A smart and low latency speech-to-speech dialogue model with frozen LLM","author":"Wang","year":"2024","journal-title":"CoRR"},{"key":"ref14","article-title":"Glm-4-voice: Towards intelligent and human-like end-to-end spoken chatbot","volume":"abs\/2412.02612","author":"Zeng","year":"2024","journal-title":"CoRR"},{"key":"ref15","article-title":"Mini-omni2: Towards open-source gpt-4o with vision, speech and duplex capabilities","author":"Xie","year":"2024","journal-title":"CoRR"},{"key":"ref16","article-title":"OSUM: advancing open speech understanding models with limited resources in academia","author":"Geng","year":"2025","journal-title":"CoRR"},{"key":"ref17","article-title":"Kimi-audio technical report","author":"KimiTeam","year":"2025"},{"key":"ref18","article-title":"Qwen2.5-omni technical report","author":"Xu","year":"2025"},{"key":"ref19","article-title":"VALL-E 2: Neural codec language models are human parity zero-shot text to speech synthesizers","author":"Chen","year":"2024","journal-title":"CoRR"},{"key":"ref20","article-title":"Accelerating codec-based speech synthesis with multi-token prediction and speculative decoding","volume-title":"CoRR","author":"Nguyen","year":"2024"},{"key":"ref21","article-title":"Deepseek-v3 technical report","author":"DeepSeek-AI","year":"2024"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2441"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"}],"event":{"name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Honolulu, HI, USA","start":{"date-parts":[[2025,12,6]]},"end":{"date-parts":[[2025,12,10]]}},"container-title":["2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11434577\/11433836\/11434621.pdf?arnumber=11434621","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T04:57:24Z","timestamp":1775192244000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11434621\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,12,6]]},"references-count":23,"URL":"https:\/\/doi.org\/10.1109\/asru65441.2025.11434621","relation":{},"subject":[],"published":{"date-parts":[[2025,12,6]]}}}