{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,15]],"date-time":"2026-03-15T16:57:37Z","timestamp":1773593857335,"version":"3.50.1"},"reference-count":65,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","license":[{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,1,1]],"date-time":"2023-01-01T00:00:00Z","timestamp":1672531200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"name":"Grants NIH-NIDCD","award":["R01DC006859"],"award-info":[{"award-number":["R01DC006859"]}]},{"name":"NIH-NIDCD","award":["DC019475"],"award-info":[{"award-number":["DC019475"]}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE\/ACM Trans. Audio Speech Lang. Process."],"published-print":{"date-parts":[[2023]]},"DOI":"10.1109\/taslp.2023.3261753","type":"journal-article","created":{"date-parts":[[2023,3,31]],"date-time":"2023-03-31T17:52:22Z","timestamp":1680285142000},"page":"1348-1359","source":"Crossref","is-referenced-by-count":8,"title":["Robust Vocal Quality Feature Embeddings for Dysphonic Voice Detection"],"prefix":"10.1109","volume":"31","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6419-2038","authenticated-orcid":false,"given":"Jianwei","family":"Zhang","sequence":"first","affiliation":[{"name":"School of Electrical, Computer and Energy Engineering, Arizona State University, Tempe, AZ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Julie","family":"Liss","sequence":"additional","affiliation":[{"name":"College of Health Solutions, Arizona State University, Tempe, AZ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-7143-4429","authenticated-orcid":false,"given":"Suren","family":"Jayasuriya","sequence":"additional","affiliation":[{"name":"School of Arts, Media and Engineering and the School of Electrical, Computer and Energy Engineering, Arizona State University, Tempe, AZ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Visar","family":"Berisha","sequence":"additional","affiliation":[{"name":"College of Health Solutions and School of Electrical, Computer and Energy Engineering, Arizona State University, Tempe, AZ, USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2012.07.012"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1016\/j.anorl.2011.04.004"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1097\/01.mlg.0000232537.58310.22"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/S0892-1997(96)80028-2"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1097\/00020840-200306000-00002"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1177\/0194599817751030"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1044\/1058-0360(2010\/09-0105)"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-642-35292-8_11"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICMIC.2015.7409479"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1155\/2015\/956249"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.compbiomed.2015.07.026"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2816338"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2021-1507"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.engappai.2018.09.018"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2020.107528"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1122"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1159\/000511671"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IWOBI.2017.7985525"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1351"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2018.2856238"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.3390\/app10113723"},{"key":"ref22","article-title":"Saarbruecken voice database","author":"Woldert-Jokisz","year":"2007"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.3109\/14015439.2010.528788"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.5555\/1756006.1953024"},{"key":"ref25","article-title":"BERT: Pre-training of deep bidirectional transformers for language understanding","author":"Devlin","year":"2018"},{"key":"ref26","first-page":"1877","article-title":"Language models are few-shot learners","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Brown","year":"2020"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2014.6854363"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/LSP.2015.2420092"},{"key":"ref29","first-page":"6827","article-title":"What makes for good views for contrastive learning","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"33","author":"Tian","year":"2020"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.5244\/C.30.119"},{"key":"ref31","article-title":"Deep speaker: An end-to-end neural speaker embedding system","author":"Li","year":"2017"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2016.7472652"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462665"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00975"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.5555\/3524938.3525087"},{"key":"ref36","first-page":"5180","article-title":"Style tokens: Unsupervised style modeling, control and transfer in end-to-end speech synthesis","volume-title":"Proc. Int. Conf. Mach. Learn.","author":"Wang","year":"2018"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/fg52635.2021.9666944"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2365"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1177\/0956797620963619"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413391"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1007\/s00521-018-3464-7"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1016\/j.bspc.2016.08.002"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2020.2995737"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1145\/1873951.1874246"},{"key":"ref45","article-title":"Praat: Doing phonetics by computer","author":"Boersma","year":"2021"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.3390\/s17020267"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1038\/s41746-021-00521-5"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2016.2626316"},{"key":"ref49","article-title":"DiffWave: A versatile diffusion model for audio synthesis","volume-title":"Proc. Int. Conf. Learning Representations","author":"Kong","year":"2021"},{"key":"ref50","first-page":"8026","article-title":"Pytorch: An imperative style, high-performance deep learning library","volume-title":"Proc. Adv. Neural Inf. Process. Syst.","volume":"32","author":"Paszke","year":"2019"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1121\/1.4799597"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1109\/ICDSP.2009.5201259"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1073\/pnas.1612524113"},{"key":"ref54","article-title":"The IR data for vintage microphones"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.25080\/Majora-7b98e3ed-003"},{"key":"ref56","article-title":"Robust speech recognition via large-scale weak supervision","author":"Radford","year":"2022"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.5555\/1953048.2078195"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.309"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.21105\/joss.00861"},{"key":"ref60","first-page":"1","article-title":"Clinical examination of voice","volume":"5","author":"Hirano","year":"1981","journal-title":"Disord. Hum. Commun."},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2019.2956410"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1016\/j.apacoust.2021.108417"},{"key":"ref63","first-page":"12449","article-title":"wav2vec 2.0: A framework for self-supervised learning of speech representations","volume":"33","author":"Baevski","year":"2020","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref64","doi-asserted-by":"publisher","DOI":"10.1109\/BSN.2019.8771043"},{"key":"ref65","doi-asserted-by":"publisher","DOI":"10.1186\/s10194-021-01296-6"}],"container-title":["IEEE\/ACM Transactions on Audio, Speech, and Language Processing"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/6570655\/9970249\/10084549.pdf?arnumber=10084549","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,3,3]],"date-time":"2024-03-03T08:38:10Z","timestamp":1709455090000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10084549\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023]]},"references-count":65,"URL":"https:\/\/doi.org\/10.1109\/taslp.2023.3261753","relation":{},"ISSN":["2329-9290","2329-9304"],"issn-type":[{"value":"2329-9290","type":"print"},{"value":"2329-9304","type":"electronic"}],"subject":[],"published":{"date-parts":[[2023]]}}}