{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:28:23Z","timestamp":1775230103126,"version":"3.50.1"},"reference-count":36,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,12,16]],"date-time":"2023-12-16T00:00:00Z","timestamp":1702684800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,12,16]]},"DOI":"10.1109\/asru57964.2023.10389617","type":"proceedings-article","created":{"date-parts":[[2024,1,19]],"date-time":"2024-01-19T13:38:40Z","timestamp":1705671520000},"page":"1-8","source":"Crossref","is-referenced-by-count":11,"title":["Zero-Shot Domain-Sensitive Speech Recognition with Prompt-Conditioning Fine-Tuning"],"prefix":"10.1109","author":[{"given":"Feng-Ting","family":"Liao","sequence":"first","affiliation":[{"name":"MediaTek Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yung-Chieh","family":"Chan","sequence":"additional","affiliation":[{"name":"MediaTek Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yi-Chang","family":"Chen","sequence":"additional","affiliation":[{"name":"MediaTek Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chan-Jan","family":"Hsu","sequence":"additional","affiliation":[{"name":"MediaTek Research"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Da-Shan","family":"Shiu","sequence":"additional","affiliation":[{"name":"MediaTek Research"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.amsu.2020.09.015"},{"key":"ref2","article-title":"Improving callsign recognition with air-surveillance data in air-traffic communication","author":"Nigmatulina","year":"2021","journal-title":"arXiv:2108.12156 [cs, eess]"},{"key":"ref3","article-title":"Earnings-22: A Practical Benchmark for Accents in the Wild","author":"Rio","year":"2022","journal-title":"arXiv:2203.15591 [cs]"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-824"},{"key":"ref5","article-title":"Language models are few-shot learners","author":"Brown","journal-title":"arXiv, no. NeurIPS, 2020, arXiv: 2005.14165"},{"key":"ref6","article-title":"GPT-4 Technical Report","volume-title":"arXiv:2303.08774 [cs]","year":"2023"},{"key":"ref7","article-title":"Fine-tuned Language Models Are Zero-Shot Learners","author":"Wei","year":"2021","journal-title":"arXiv: 2109.01652"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1162\/tacl_a_00468"},{"key":"ref9","first-page":"2020","article-title":"Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer","volume-title":"arXiv: 1910.10683v3 Publication Title: Journal of Machine Learning Research","volume":"21","author":"Raffel"},{"key":"ref10","article-title":"Robust Speech Recognition via Large-Scale Weak Supervision","author":"Radford","year":"2022","journal-title":"arXiv:2212.04356 [cs, eess]"},{"issue":"Nips","key":"ref11","first-page":"5999","article-title":"Attention is all you need","volume":"2017-Decem","author":"Vaswani","year":"2017","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref12","article-title":"Language Models are Unsupervised Multitask Learners","author":"Radford","year":"2018"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1965"},{"key":"ref14","year":"2019","journal-title":"Medical Speech, Transcription, and Intent"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1007\/s10579-019-09449-5"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1033"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10557"},{"key":"ref18","article-title":"Adaptable End-toEnd ASR Models using Replaceable Internal LMs and Residual Softmax","author":"Deng","year":"2023","journal-title":"arXiv:2302.08579 [cs, eess]"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2032"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10610"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9414800"},{"key":"ref22","article-title":"Attention Based Models for Speech Recognition","author":"Chorowski","year":"2015","journal-title":"arXiv:1506.07503 [cs, stat]"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10095469"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746126"},{"key":"ref25","article-title":"Fast Contextual Adaptation with Neural Associative Memory for OnDevice Personalized Speech Recognition","author":"Munkhdalai","year":"2021","journal-title":"arXiv:2110.02220 [cs, eess]"},{"key":"ref26","article-title":"Scaling Laws for Neural Language Models","author":"Kaplan","year":"2020","journal-title":"arXiv: 2001.08361"},{"key":"ref27","volume-title":"Stanford Alpaca: An Instruction-following LLaMA model","author":"Taori","year":"2023"},{"key":"ref28","article-title":"Tiny Stories: How Small Can Language Models Be and Still Speak Coherent English?","author":"Eldan","year":"2023","journal-title":"arXiv:2305.07759 [cs]"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.2305016120"},{"key":"ref30","article-title":"Classifier-Free Diffusion Guidance","author":"Ho","year":"2022","journal-title":"arXiv:2207.12598 [cs]"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1611835114"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-13"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.3389\/fpsyg.2022.778018"},{"key":"ref34","article-title":"Common Voice: A Massively-Multilingual Speech Corpus","author":"Ardila","year":"2020","journal-title":"arXiv:1912.06670 [cs]"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-78"}],"event":{"name":"2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","location":"Taipei, Taiwan","start":{"date-parts":[[2023,12,16]]},"end":{"date-parts":[[2023,12,20]]}},"container-title":["2023 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10388490\/10389614\/10389617.pdf?arnumber=10389617","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,1,23]],"date-time":"2024-01-23T11:43:58Z","timestamp":1706010238000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10389617\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,12,16]]},"references-count":36,"URL":"https:\/\/doi.org\/10.1109\/asru57964.2023.10389617","relation":{},"subject":[],"published":{"date-parts":[[2023,12,16]]}}}