{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:27:22Z","timestamp":1775230042370,"version":"3.50.1"},"reference-count":86,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"5","license":[{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,7,1]],"date-time":"2025-07-01T00:00:00Z","timestamp":1751328000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"UC Noyce Initiative"},{"name":"Society of Hellman Fellows"},{"name":"NIH\/NIDCD"},{"name":"Schwab Innovation"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE J. Sel. Top. Signal Process."],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1109\/jstsp.2025.3579972","type":"journal-article","created":{"date-parts":[[2025,6,16]],"date-time":"2025-06-16T14:58:39Z","timestamp":1750085919000},"page":"810-826","source":"Crossref","is-referenced-by-count":4,"title":["Automatic Detection of Articulatory-Based Disfluencies in Primary Progressive Aphasia"],"prefix":"10.1109","volume":"19","author":[{"ORCID":"https:\/\/orcid.org\/0009-0002-3556-2014","authenticated-orcid":false,"given":"Jiachen","family":"Lian","sequence":"first","affiliation":[{"name":"University of California, Berkeley, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-7483-6604","authenticated-orcid":false,"given":"Xuanru","family":"Zhou","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0000-3899-3544","authenticated-orcid":false,"given":"Chenxu","family":"Guo","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zongli","family":"Ye","sequence":"additional","affiliation":[{"name":"Zhejiang University, Hangzhou, Zhejiang, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zoe","family":"Ezzes","sequence":"additional","affiliation":[{"name":"University of California, San Francisco, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jet M.J.","family":"Vonk","sequence":"additional","affiliation":[{"name":"University of California, San Francisco, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-9562-4063","authenticated-orcid":false,"given":"Brittany","family":"Morin","sequence":"additional","affiliation":[{"name":"University of California, San Francisco, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Baquirin","sequence":"additional","affiliation":[{"name":"University of California, San Francisco, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5991-3053","authenticated-orcid":false,"given":"Zachary","family":"Miller","sequence":"additional","affiliation":[{"name":"University of California, San Francisco, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Maria Luisa","family":"Gorno-Tempini","sequence":"additional","affiliation":[{"name":"University of California, San Francisco, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9714-7740","authenticated-orcid":false,"given":"Gopala Krishna","family":"Anumanchipalli","sequence":"additional","affiliation":[{"name":"University of California, Berkeley, CA, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","first-page":"101818","article-title":"SSDM: Scalable speech dysfluency modeling","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"37","author":"Lian","year":"2024"},{"key":"ref2","article-title":"Towards audio language modeling-an overview","author":"Wu","year":"2024"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1044\/2024_AJSLP-23-00208"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1080\/02687038.2023.2244728"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1080\/02687039808249565"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1044\/2021_JSLHR-21-00484"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389771"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1080\/02687038.2023.2171261"},{"issue":"2","key":"ref9","first-page":"2157","article-title":"Classification of speech dysfluencies with MFCC and LPCC features","volume-title":"Expert Syst. Appl.","volume":"39","author":"Ai","year":"2012"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TECHPOS.2009.5412080"},{"key":"ref11","first-page":"104","article-title":"Automatic classification of speech dysfluencies in continuous speech based on similarity measures and morphological image processing tools","volume-title":"Biomed. Signal Process. Control","volume":"23","author":"Esmaili","year":"2016"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ISCMI56532.2022.10068490"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2246"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3110146"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2019.101052"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746638"},{"key":"ref17","first-page":"60","article-title":"Automatic dysfluency detection in dysarthric speech using deep belief networks","volume-title":"Proc. 6th Workshop Speech Lang. Process. Assistive Technol.","author":"Oue","year":"2015"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-10908"},{"key":"ref19","first-page":"372","article-title":"Automatic recognition of repetitions and prolongations in stuttered speech","volume-title":"Proc. 1st World Congr. Fluency Disord.","volume":"2","author":"Howell","year":"1995"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/wocci.2017-1"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICIAS.2007.4658401"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1016\/j.csl.2023.101519"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-2026"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/ICACCI.2018.8554455"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094692"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2120"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-2293"},{"key":"ref28","article-title":"Enhancing ASR for stuttered speech with limited data using detect and pass","author":"Shonibare","year":"2022"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-204"},{"key":"ref30","first-page":"539","article-title":"Towards hierarchical spoken language disfluency modeling","volume-title":"Proc. 18th Conf. Euro. Chapter Assoc. Comput. Linguistics","volume":"1","author":"Lian","year":"2024"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-1855"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/SLT61566.2024.10832222"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2024-1855"},{"key":"ref34","article-title":"Learning audio-visual speech representation by masked multimodal cluster prediction","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Shi","year":"2022"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389642"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10096401"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/jstsp.2024.3497655"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2316"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/PROC.1976.10154"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/S0095-4470(19)30376-6"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1159\/000261913"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.3182\/20100901-3-IT-2016.00302"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1121\/1.4812765"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11233"},{"key":"ref45","article-title":"Llama: Open and efficient foundation language models","author":"Touvron","year":"2023"},{"key":"ref46","article-title":"Listen, think, and understand","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Gong","year":"2024"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2011-316"},{"key":"ref49","article-title":"Categorical reparameterization with gumbel-softmax","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Jang","year":"2017"},{"key":"ref50","article-title":"Flow matching for generative modeling","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Lipman","year":"2023"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.3362\/0262-8104.2002.009"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1145\/1143844.1143891"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1145\/322033.322044"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01353"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2024-731"},{"key":"ref57","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Baevski","year":"2020"},{"key":"ref58","first-page":"271","article-title":"CSTR VCTK corpus: English multi-speaker corpus for CSTR voice cloning toolkit (version 0.92)","author":"Yamagishi","year":"2019"},{"issue":"140","key":"ref59","first-page":"1","article-title":"Exploring the limits of transfer learning with a unified text-to-text transformer","volume-title":"J. Mach. Learn. Res.","volume":"21","author":"Raffel","year":"2020"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.324"},{"key":"ref61","first-page":"28492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Radford","year":"2023"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9054362"},{"key":"ref63","doi-asserted-by":"publisher","DOI":"10.1212\/WNL.0b013e31821103e6"},{"key":"ref64","article-title":"Salmonn: Towards generic hearing abilities for large language models","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Tang","year":"2024"},{"key":"ref65","article-title":"GPT-4 technical report","author":"OpenAI","year":"2023"},{"key":"ref66","article-title":"Gpt4-o","year":"2024"},{"key":"ref67","article-title":"SALMONN: Towards generic hearing abilities for large language models","volume-title":"Proc. 12th Int. Conf. Learn. Representations","author":"Tang","year":"2024"},{"key":"ref68","article-title":"The uclass archive of stuttered speech","volume-title":"J. Speech Lang. Hear. Res.","volume":"52","author":"Howell","year":"2009"},{"key":"ref69","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-16270-1_35"},{"key":"ref70","first-page":"1526","article-title":"Libritts: A corpus derived from librispeech for text-to-speech","volume-title":"Proc. INTERSPEECH","author":"Zen","year":"2019"},{"key":"ref71","first-page":"19594","article-title":"Styletts 2: Towards human-level text-to-speech through style diffusion and adversarial training with large speech language models","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"36","author":"Li","year":"2023"},{"key":"ref72","doi-asserted-by":"publisher","DOI":"10.21105\/joss.03958"},{"key":"ref73","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1386"},{"key":"ref74","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2023-78"},{"key":"ref75","first-page":"5530","article-title":"Conditional variational autoencoder with adversarial learning for end-to-end text-to-speech","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Kim","year":"2021"},{"key":"ref76","article-title":"Wavlm-ctc-hugginface","year":"2021"},{"key":"ref77","doi-asserted-by":"publisher","DOI":"10.1109\/SLT54892.2023.10022827"},{"key":"ref78","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2008.79"},{"key":"ref79","doi-asserted-by":"publisher","DOI":"10.1145\/3503250"},{"key":"ref80","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3207050"},{"key":"ref81","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.74"},{"key":"ref82","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1706.03762"},{"key":"ref83","article-title":"Fastspeech 2: Fast and high-quality end-to-end text to speech","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Ren","year":"2021"},{"key":"ref84","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389742"},{"issue":"2","key":"ref85","article-title":"LORA: Low-rank adaptation of large language models","volume-title":"Proc. Int. Conf. Learn. Representations","volume":"1","author":"Hu","year":"2022"},{"key":"ref86","article-title":"Adam: A method for stochastic optimization","volume-title":"Proc. Int. Conf. Learn. Representations","author":"Kingma","year":"2015"}],"container-title":["IEEE Journal of Selected Topics in Signal Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/4200690\/11275987\/11036667.pdf?arnumber=11036667","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,2,12]],"date-time":"2026-02-12T20:59:40Z","timestamp":1770929980000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11036667\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,7]]},"references-count":86,"journal-issue":{"issue":"5"},"URL":"https:\/\/doi.org\/10.1109\/jstsp.2025.3579972","relation":{},"ISSN":["1932-4553","1941-0484"],"issn-type":[{"value":"1932-4553","type":"print"},{"value":"1941-0484","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,7]]}}}