{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,20]],"date-time":"2025-11-20T05:58:01Z","timestamp":1763618281981,"version":"3.45.0"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"type":"print","value":"9783031705656"},{"type":"electronic","value":"9783031705663"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-70566-3_3","type":"book-chapter","created":{"date-parts":[[2024,8,31]],"date-time":"2024-08-31T18:29:51Z","timestamp":1725128991000},"page":"24-35","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Dysphonia Diagnosis Using Self-supervised Speech Models in\u00a0Mono and\u00a0Cross-Lingual Settings"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6449-7501","authenticated-orcid":false,"given":"Dosti","family":"Aziz","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7361-4260","authenticated-orcid":false,"given":"D\u00e1vid","family":"Sztah\u00f3","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,27]]},"reference":[{"key":"3_CR1","doi-asserted-by":"publisher","first-page":"6961","DOI":"10.1109\/ACCESS.2017.2696056","volume":"6","author":"A AlNasheri","year":"2017","unstructured":"AlNasheri, A., et al.: Voice pathology detection and classification using auto-correlation and entropy features in different frequency regions. IEEE Access 6, 6961\u20136974 (2017). https:\/\/doi.org\/10.1109\/ACCESS.2017.2696056","journal-title":"IEEE Access"},{"key":"3_CR2","doi-asserted-by":"publisher","unstructured":"Amami, R., Amami, R., Trabelsi, C., Mabrouk, S.H., Khalil, H.A.: A robust voice pathology detection system based on the combined BiLSTM\u2013CNN architecture. 1. 29(2), 202\u2013210 (2023). https:\/\/doi.org\/10.13164\/mendel.2023.2.202","DOI":"10.13164\/mendel.2023.2.202"},{"key":"3_CR3","unstructured":"Aronson, A.E.: Clinical Voice Disorders: An Interdisciplinary Approach. Thieme (1990)"},{"key":"3_CR4","doi-asserted-by":"publisher","unstructured":"Aziz, D., D\u00e1vid, S.: Multitask and transfer learning approach for joint classification and severity estimation of dysphonia. IEEE J. Transl. Eng. Health Med. 12, 233\u2013244 (2023). https:\/\/doi.org\/10.1109\/JTEHM.2023.3340345","DOI":"10.1109\/JTEHM.2023.3340345"},{"key":"3_CR5","doi-asserted-by":"publisher","unstructured":"Baevski, A., Hsu, W.N., Xu, Q., Babu, A., Gu, J., Auli, M.: data2vec: A General Framework for Self-supervised Learning in Speech, Vision and Language. arXiv (2022). https:\/\/doi.org\/10.48550\/arXiv.2202.03555","DOI":"10.48550\/arXiv.2202.03555"},{"key":"3_CR6","doi-asserted-by":"publisher","unstructured":"Baevski, A., Zhou, H., Mohamed, A., Auli, M.: wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations. arXiv (2020). https:\/\/doi.org\/10.48550\/arXiv.2006.11477","DOI":"10.48550\/arXiv.2006.11477"},{"key":"3_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2023.102952","volume":"152","author":"S Barbon","year":"2023","unstructured":"Barbon, S., Guido, R.C., Aguiar, G.J., Santana, E.J., Proen\u00e7a, M.L., Patil, H.A.: Multiple voice disorders in the same individual: investigating handcrafted features, multi-label classification algorithms, and base-learners. Speech Commun. 152, 102952 (2023). https:\/\/doi.org\/10.1016\/j.specom.2023.102952","journal-title":"Speech Commun."},{"key":"3_CR8","doi-asserted-by":"publisher","unstructured":"Cesarini, V., et al.: Machine learning-based study of dysphonic voices for the identification and differentiation of vocal cord paralysis and vocal nodules. In: Proceedings of the 15th International Joint Conference on Biomedical Engineering Systems and Technologies (BIOSTEC 2022), vol.\u00a04, pp. 265\u2013272. SCITEPRESS, Portugal (2022). https:\/\/doi.org\/10.5220\/0010913800003123","DOI":"10.5220\/0010913800003123"},{"issue":"2","key":"3_CR9","doi-asserted-by":"publisher","first-page":"279","DOI":"10.1111\/1460-6984.12783","volume":"58","author":"Z Chen","year":"2023","unstructured":"Chen, Z., Zhu, P., Qiu, W., Guo, J., Li, Y.: Deep learning in automatic detection of dysphonia: comparing acoustic features and developing a generalizable framework. Int. J. Lang. Commun. Disord. 58(2), 279\u2013294 (2023). https:\/\/doi.org\/10.1111\/1460-6984.12783","journal-title":"Int. J. Lang. Commun. Disord."},{"key":"3_CR10","doi-asserted-by":"publisher","unstructured":"Devlin, J., Chang, M.W., Lee, K., Toutanova, K.: BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. arXiv (2018). https:\/\/doi.org\/10.48550\/arXiv.1810.04805","DOI":"10.48550\/arXiv.1810.04805"},{"issue":"1","key":"3_CR11","doi-asserted-by":"publisher","first-page":"549","DOI":"10.3390\/biomedinformatics4010031","volume":"4","author":"MG Di Cesare","year":"2024","unstructured":"Di Cesare, M.G., Perpetuini, D., Cardone, D., Merla, A.: Assessment of voice disorders using machine learning and vocal analysis of voice samples recorded through smartphones. BioMedInformatics 4(1), 549\u2013565 (2024). https:\/\/doi.org\/10.3390\/biomedinformatics4010031","journal-title":"BioMedInformatics"},{"key":"3_CR12","doi-asserted-by":"publisher","unstructured":"Favaro, A., et al.: Interpretable speech features vs. DNN embeddings: what to use in the automatic assessment of Parkinson\u2019s disease in multi-lingual scenarios. Comput. Biol. Med. 166, 107559 (2023). https:\/\/doi.org\/10.1016\/j.compbiomed.2023.107559","DOI":"10.1016\/j.compbiomed.2023.107559"},{"key":"3_CR13","doi-asserted-by":"publisher","unstructured":"Harar, P., Alonso-Hernandezy, J.B., Mekyska, J., Galaz, Z., Burget, R., Smekal, Z.: Voice pathology detection using deep learning: a preliminary study. In: 2017 International Conference and Workshop on Bioinspired Intelligence (IWOBI), pp. 10\u201312. IEEE (2017). https:\/\/doi.org\/10.1109\/IWOBI.2017.7985525","DOI":"10.1109\/IWOBI.2017.7985525"},{"key":"3_CR14","doi-asserted-by":"publisher","unstructured":"Hoffman, J.I.E.: Chapter 33 - Logistic Regression. In: Basic Biostatistics for Medical and Biomedical Practitioners (Second Edition), pp. 581\u2013589. Academic Press, Cambridge, MA, USA (2019). https:\/\/doi.org\/10.1016\/B978-0-12-817084-7.00033-4","DOI":"10.1016\/B978-0-12-817084-7.00033-4"},{"key":"3_CR15","doi-asserted-by":"publisher","unstructured":"Hsu, W.N., Bolte, B., Tsai, Y.H.H., Lakhotia, K., Salakhutdinov, R., Mohamed, A.: HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units. arXiv (2021). https:\/\/doi.org\/10.48550\/arXiv.2106.07447","DOI":"10.48550\/arXiv.2106.07447"},{"key":"3_CR16","doi-asserted-by":"publisher","unstructured":"Jenei, A.Z., Kiss, G., Sztah\u00f3, D.: Detection of speech related disorders by pre-trained embedding models extracted biomarkers. In: Prasanna, S.R.M., Karpov, A., Samudravijaya, K., Agrawal, S.S. (eds.) Speech and Computer. SPECOM 2022. LNCS(), vol. 13721. Springer, Cham (2022). https:\/\/doi.org\/10.1007\/978-3-031-20980-2_24","DOI":"10.1007\/978-3-031-20980-2_24"},{"key":"3_CR17","doi-asserted-by":"publisher","unstructured":"Kotarba, K., Kotarba, M.: Voice pathology assessment using X-vectors approach. Vibr. Phys. Syst. 32(1), 2021108 (2021). https:\/\/doi.org\/10.21008\/j.0860-6897.2021.1.08","DOI":"10.21008\/j.0860-6897.2021.1.08"},{"key":"3_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2022.11.001","author":"DRA Leite","year":"2022","unstructured":"Leite, D.R.A., de Moraes, R.M., Lopes, L.W.: Different performances of machine learning models to classify dysphonic and non-dysphonic voices. J. Voice (2022). https:\/\/doi.org\/10.1016\/j.jvoice.2022.11.001","journal-title":"J. Voice"},{"issue":"6","key":"3_CR19","doi-asserted-by":"publisher","first-page":"3155","DOI":"10.1007\/s11760-023-02537-8","volume":"17","author":"SS Nayak","year":"2023","unstructured":"Nayak, S.S., Darji, A.D., Shah, P.K.: Machine learning approach for detecting COVID-19 from speech signal using Mel frequency magnitude coefficient. SIViP 17(6), 3155\u20133162 (2023). https:\/\/doi.org\/10.1007\/s11760-023-02537-8","journal-title":"SIViP"},{"key":"3_CR20","doi-asserted-by":"publisher","unstructured":"Noble, W.S.: What is a support vector machine? Nat. Biotechnol. 24, 1565\u20131567 (2006). https:\/\/doi.org\/10.1038\/nbt1206-1565","DOI":"10.1038\/nbt1206-1565"},{"key":"3_CR21","doi-asserted-by":"publisher","unstructured":"Pappagari, R., et al.: Automatic Detection and Assessment of Alzheimer Disease Using Speech and Language Technologies in Low-Resource Scenarios (2021). https:\/\/doi.org\/10.21437\/Interspeech.2021-1850. Accessed 25 Mar 2024","DOI":"10.21437\/Interspeech.2021-1850"},{"key":"3_CR22","doi-asserted-by":"publisher","unstructured":"Pedregosa, F., et\u00a0al.: Scikit-learn: machine learning in python. J. Mach. Learn. Res. 12, 2825\u20132830 (2011) https:\/\/doi.org\/10.48550\/arXiv.1201.0490","DOI":"10.48550\/arXiv.1201.0490"},{"key":"3_CR23","unstructured":"Ravanelli, M., et al.: SpeechBrain: A general-purpose speech toolkit (2021). arXiv:2106.04624"},{"key":"3_CR24","doi-asserted-by":"publisher","first-page":"14915","DOI":"10.1109\/ACCESS.2023.3243986","volume":"11","author":"D Ribas","year":"2023","unstructured":"Ribas, D., Pastor, M.A., Miguel, A., Mart\u00ednez, D., Ortega, A., Lleida, E.: Automatic voice disorder detection using self-supervised representations. IEEE Access 11, 14915\u201314927 (2023). https:\/\/doi.org\/10.1109\/ACCESS.2023.3243986","journal-title":"IEEE Access"},{"key":"3_CR25","doi-asserted-by":"publisher","unstructured":"Rivera, M.A.B., Garc\u00eda, C.A.R., Rojas, T.C.T., Flores, P.M.Q., Loaiza, R.E.P.: Automatic identification of Dysphonias using machine learning algorithms. Appl. Comput. Sci. 19(4), 14\u201325 (2023). https:\/\/doi.org\/10.35784\/acs-2023-32","DOI":"10.35784\/acs-2023-32"},{"key":"3_CR26","unstructured":"Salih, A.D.A.H., D\u00e1vid, S.: Cross-lingual dysphonic speech detection using pretrained speaker embeddings (2023). https:\/\/acta.bibl.u-szeged.hu\/78412. Accessed 26 Mar 2024"},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Sztah\u00f3, D., Tulics, M.G., Qi, J., Vicsi, K., et\u00a0al.: Cross-lingual detection of dysphonic speech for Dutch and Hungarian datasets. In: Proceedings of the 15th International Joint Conference on Biomedical Engineering Systems and Technologies (BIOSTEC 2022)-Volume 4: BIOSIGNALS, pp. 215\u2013220. SCITEPRESS\u2013Science and Technology Publications (2022)","DOI":"10.5220\/0010890200003123"},{"key":"3_CR28","doi-asserted-by":"publisher","unstructured":"Tuncer, T., et al.: FLP: Factor lattice pattern-based automated detection of Parkinson\u2019s disease and specific language impairment using recorded speech. Comput. Biol. Med. 173, 108280 (2024). https:\/\/doi.org\/10.1016\/j.compbiomed.2024.108280","DOI":"10.1016\/j.compbiomed.2024.108280"},{"issue":"7","key":"3_CR29","doi-asserted-by":"publisher","first-page":"456","DOI":"10.1093\/occmed\/kqg113","volume":"53","author":"NR Williams","year":"2024","unstructured":"Williams, N.R.: Occupational groups at risk of voice disorders: a review of the literature. Occup. Med. 53(7), 456\u2013460 (2024). https:\/\/doi.org\/10.1093\/occmed\/kqg113","journal-title":"Occup. Med."},{"key":"3_CR30","doi-asserted-by":"publisher","DOI":"10.1016\/j.jvoice.2023.06.006","author":"B van der Woerd","year":"2023","unstructured":"van der Woerd, B., et al.: A machine-learning algorithm for the automated perceptual evaluation of dysphonia severity. J. Voice (2023). https:\/\/doi.org\/10.1016\/j.jvoice.2023.06.006","journal-title":"J. Voice"},{"issue":"2","key":"3_CR31","doi-asserted-by":"publisher","first-page":"328","DOI":"10.3390\/electronics12020328","volume":"12","author":"F Yin","year":"2023","unstructured":"Yin, F., Du, J., Xu, X., Zhao, L.: Depression detection in speech using transformer and parallel convolutional neural networks. Electronics 12(2), 328 (2023). https:\/\/doi.org\/10.3390\/electronics12020328","journal-title":"Electronics"}],"container-title":["Lecture Notes in Computer Science","Text, Speech, and Dialogue"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-70566-3_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,20]],"date-time":"2025-11-20T05:14:36Z","timestamp":1763615676000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-70566-3_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031705656","9783031705663"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-70566-3_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"27 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"TSD","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Text, Speech, and Dialogue","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Brno","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Czech Republic","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"tsd2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/www.tsdconference.org\/tsd2024\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}