{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:26:53Z","timestamp":1775230013545,"version":"3.50.1"},"reference-count":40,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,2]],"date-time":"2024-12-02T00:00:00Z","timestamp":1733097600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,2]]},"DOI":"10.1109\/slt61566.2024.10832222","type":"proceedings-article","created":{"date-parts":[[2025,1,16]],"date-time":"2025-01-16T18:31:27Z","timestamp":1737052287000},"page":"1039-1046","source":"Crossref","is-referenced-by-count":4,"title":["Stutter-Solver: End-To-End Multi-Lingual Dysfluency Detection"],"prefix":"10.1109","author":[{"given":"Xuanru","family":"Zhou","sequence":"first","affiliation":[{"name":"Zhejiang University"}]},{"given":"Cheol Jun","family":"Cho","sequence":"additional","affiliation":[{"name":"UC Berkeley"}]},{"given":"Ayati","family":"Sharma","sequence":"additional","affiliation":[{"name":"UC Berkeley"}]},{"given":"Brittany","family":"Morin","sequence":"additional","affiliation":[{"name":"UCSF"}]},{"given":"David","family":"Baquirin","sequence":"additional","affiliation":[{"name":"UCSF"}]},{"given":"Jet","family":"Vonk","sequence":"additional","affiliation":[{"name":"UCSF"}]},{"given":"Zoe","family":"Ezzes","sequence":"additional","affiliation":[{"name":"UCSF"}]},{"given":"Zachary","family":"Miller","sequence":"additional","affiliation":[{"name":"UCSF"}]},{"given":"Boon Lead","family":"Tee","sequence":"additional","affiliation":[{"name":"UCSF"}]},{"given":"Maria Luisa","family":"Gorno-Tempini","sequence":"additional","affiliation":[{"name":"UCSF"}]},{"given":"Jiachen","family":"Lian","sequence":"additional","affiliation":[{"name":"UC Berkeley"}]},{"given":"Gopala","family":"Anumanchipalli","sequence":"additional","affiliation":[{"name":"UC Berkeley"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389771"},{"key":"ref2","article-title":"Towards hierarchical spoken language dysfluency modeling","volume-title":"Proceedings of the 18th Conference of the European Chapter of the Association for Computational Linguistics","author":"Lian"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref4","volume-title":"ppa market"},{"key":"ref5","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International Conference on Machine Learning","author":"Radford"},{"key":"ref6","article-title":"Google usm: Scaling automatic speech recognition beyond 100 languages","author":"Zhang","year":"2023","journal-title":"arXiv preprint arXiv:2303.01037"},{"key":"ref7","article-title":"Scaling speech technology to 1,000+ languages","author":"Pratap","year":"2023","journal-title":"arXiv preprint arXiv:2305.13516"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389642"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2011.07.065"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TECHPOS.2009.5412080"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2015.08.006"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ISCMI56532.2022.10068490"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053893"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3110146"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2019.101052"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746638"},{"key":"ref17","first-page":"372","article-title":"Automatic recognition of repetitions and prolongations in stuttered speech","volume-title":"Proceedings of the first World Congress on fluency disorders","volume":"2","author":"Howell"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094692"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1855"},{"key":"ref20","article-title":"Articulatory encodec: Vocal tract kinematics as a codec for speech","author":"Cho","year":"2024","journal-title":"arXiv preprint arXiv:2406.12998"},{"key":"ref21","first-page":"5530","article-title":"Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech","volume-title":"International Conference on Machine Learning","author":"Kim"},{"key":"ref22","article-title":"Cstr vctk corpus: English multi-speaker corpus for cstr voice cloning toolkit (version 0.92)","author":"Yamagishi","year":"2019","journal-title":"University of Edinburgh. The Centre for Speech Technology Research (CSTR)"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-755"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1212\/WNL.0b013e31821103e6"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/SLT48900.2021.9383558"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683143"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-3015"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2015.7178964"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1044\/1092-4388(2009\/07-0129)"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413520"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054362"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11233"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096401"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2316"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2023.3290423"},{"key":"ref37","article-title":"Contentvec: An improved self-supervised speech representation by disentangling speakers","author":"Qian","year":"2022","journal-title":"ICML"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747272"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11225"},{"key":"ref40","article-title":"Nansy++: Unified voice synthesis with neural analysis and synthesis","author":"Choi","year":"2022","journal-title":"ICLR"}],"event":{"name":"2024 IEEE Spoken Language Technology Workshop (SLT)","location":"Macao","start":{"date-parts":[[2024,12,2]]},"end":{"date-parts":[[2024,12,5]]}},"container-title":["2024 IEEE Spoken Language Technology Workshop (SLT)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10830790\/10830793\/10832222.pdf?arnumber=10832222","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,20]],"date-time":"2025-01-20T18:39:19Z","timestamp":1737398359000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10832222\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,2]]},"references-count":40,"URL":"https:\/\/doi.org\/10.1109\/slt61566.2024.10832222","relation":{},"subject":[],"published":{"date-parts":[[2024,12,2]]}}}