{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,21]],"date-time":"2026-04-21T04:10:33Z","timestamp":1776744633894,"version":"3.51.2"},"publisher-location":"Cham","reference-count":31,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783031672774","type":"print"},{"value":"9783031672781","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,1]],"date-time":"2024-01-01T00:00:00Z","timestamp":1704067200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024]]},"DOI":"10.1007\/978-3-031-67278-1_5","type":"book-chapter","created":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T06:02:45Z","timestamp":1723528965000},"page":"59-72","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Interpreting Pretrained Speech Models for Automatic Speech Assessment of Voice Disorders"],"prefix":"10.1007","author":[{"given":"Hok Shing","family":"Lau","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Mark","family":"Huntly","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Nathon","family":"Morgan","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adesua","family":"Iyenoma","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Biao","family":"Zeng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tim","family":"Bashford","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,8,14]]},"reference":[{"key":"5_CR1","doi-asserted-by":"crossref","unstructured":"Gong, Y., Chung, Y.-A., Glass, J.: AST: audio spectrogram transformer. In: Proceedings of the Interspeech 2021 (2021)","DOI":"10.21437\/Interspeech.2021-698"},{"key":"5_CR2","doi-asserted-by":"crossref","unstructured":"Chefer, H., Gur, S., Wolf, L.: Generic attention-model explainability for interpreting bi-modal and encoder-decoder transformers. In: 2021 IEEE\/CVF International Conference on Computer Vision (ICCV) (2021)","DOI":"10.1109\/ICCV48922.2021.00045"},{"key":"5_CR3","doi-asserted-by":"crossref","unstructured":"Alshammri, R., Alharbi, G., Alharbi, E., Almubark, I.: Machine learning approaches to identify Parkinson's disease using voice signal features. Front. Artif. Intell. 6 (2023)","DOI":"10.3389\/frai.2023.1084001"},{"key":"5_CR4","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhang, L., Liu, T., Pan, W., Hu, B. Zhu, T.: Acoustic differences between healthy and depressed people: a cross-situation study. BMC Psychiatry 19 (2019)","DOI":"10.1186\/s12888-019-2300-7"},{"key":"5_CR5","doi-asserted-by":"crossref","unstructured":"Nathan, V., Vatanparvar, K., Rahman, M.M., Nemati, E., Kuang, J.: Assessment of chronic pulmonary disease patients using biomarkers from natural speech recorded by mobile devices. In: 2019 IEEE 16th International Conference on Wearable and Implantable Body Sensor Networks (BSN) (2019)","DOI":"10.1109\/BSN.2019.8771043"},{"key":"5_CR6","doi-asserted-by":"crossref","unstructured":"Han, J., et al.: Exploring automatic COVID-19 diagnosis via voice and symptoms from crowdsourced data. In: ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2021)","DOI":"10.1109\/ICASSP39728.2021.9414576"},{"key":"5_CR7","doi-asserted-by":"crossref","unstructured":"van der Woerd, B., Wu, M., Parsa, V., Doyle, P.C., Fung, K.: Evaluation of acoustic analyses of voice in nonoptimized conditions. J. Speech Lang. Hear. Res. 1\u20139 (2020)","DOI":"10.1044\/2020_JSLHR-20-00212"},{"key":"5_CR8","doi-asserted-by":"crossref","unstructured":"Penney, J., Gibson, A., Cox, F., Proctor, M., Szakay, A.: A comparison of acoustic correlates of voice quality across different recording devices: a cautionary tale. In: Proceedings of the Interspeech 2021 (2021)","DOI":"10.21437\/Interspeech.2021-729"},{"key":"5_CR9","doi-asserted-by":"crossref","unstructured":"Nallanthighal, V.S., Harma, A., Strik, H.: Detection of COPD exacerbation from speech: comparison of acoustic features and deep learning based speech breathing models. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2022)","DOI":"10.1109\/ICASSP43922.2022.9747785"},{"key":"5_CR10","doi-asserted-by":"publisher","first-page":"2293","DOI":"10.3390\/s23042293","volume":"23","author":"G Costantini","year":"2023","unstructured":"Costantini, G., et al.: Artificial intelligence-based voice assessment of patients with parkinson\u2019s disease off and on treatment: machine vs. deep-learning comparison. Sensors 23, 2293 (2023)","journal-title":"Sensors"},{"key":"5_CR11","unstructured":"Millet, J., et al.: Toward a realistic model of speech processing in the brain with self-supervised learning. In: Advances in Neural Information Processing Systems (2022)"},{"key":"5_CR12","doi-asserted-by":"crossref","unstructured":"Chen, X.-Y., Zhu, Q.-S., Zhang, J., Dai, L.-R.: Supervised and self-supervised pretraining based covid-19 detection using acoustic breathing\/cough\/speech signals. In: ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2022)","DOI":"10.1109\/ICASSP43922.2022.9746205"},{"key":"5_CR13","doi-asserted-by":"crossref","unstructured":"Wagner, D., et al.: Multi-class detection of pathological speech with latent features: how does it perform on unseen data? (2022)","DOI":"10.21437\/Interspeech.2023-464"},{"key":"5_CR14","doi-asserted-by":"crossref","unstructured":"Ribeiro, M.T., Singh, S., Guestrin, C.: Why should i trust you?: Explaining the predictions of any classifier. In: Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, New York (2016)","DOI":"10.1145\/2939672.2939778"},{"key":"5_CR15","unstructured":"Lundberg, S.M., Lee, S.-I.: A unified approach to interpreting model predictions. In: Proceedings of the 31st International Conference on Neural Information Processing Systems, Red Hook (2017)"},{"key":"5_CR16","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-CAM: visual explanations from deep networks via gradient-based localization. In: 2017 IEEE International Conference on Computer Vision (ICCV) (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"5_CR17","unstructured":"Sundararajan, M., Taly, A., Yan, Q.: Axiomatic attribution for deep networks. In: Proceedings of the 34th International Conference on Machine Learning - Volume 70, Sydney (2017)"},{"key":"5_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1371\/journal.pone.0130140","volume":"10","author":"S Bach","year":"2015","unstructured":"Bach, S., Binder, A., Montavon, G., Klauschen, F., M\u00fcller, K.-R., Samek, W.: On pixel-wise explanations for non-linear classifier decisions by layer-wise relevance propagation. PLoS ONE 10, 1\u201346 (2015)","journal-title":"PLoS ONE"},{"key":"5_CR19","doi-asserted-by":"crossref","unstructured":"Abnar, S., Zuidema, W.: Quantifying attention flow in transformers. In: Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, Online (2020)","DOI":"10.18653\/v1\/2020.acl-main.385"},{"key":"5_CR20","doi-asserted-by":"publisher","first-page":"418","DOI":"10.1016\/j.jfranklin.2023.11.038","volume":"361","author":"S Becker","year":"2024","unstructured":"Becker, S., Vielhaben, J., Ackermann, M., M\u00fcller, K.-R., Lapuschkin, S., Samek, W.: AudioMNIST: exploring explainable artificial intelligence for audio analysis on a simple benchmark. J. Franklin Inst. 361, 418\u2013428 (2024)","journal-title":"J. Franklin Inst."},{"key":"5_CR21","doi-asserted-by":"crossref","unstructured":"Frommholz, A., Seipel, F., Lapuschkin, S., Samek, W., Vielhaben, J.: XAI-based comparison of audio event classifiers with different input representations. In: 20th International Conference on Content-based Multimedia Indexing (2023)","DOI":"10.1145\/3617233.3617265"},{"key":"5_CR22","unstructured":"Woldert-Jokisz, B.: Saarbruecken Voice Database (2007)"},{"key":"5_CR23","doi-asserted-by":"crossref","unstructured":"Huckvale, M., Buciuleac, C.: Automated detection of voice disorder in the Saarbr\u00fccken voice database: effects of pathology subset and audio materials. In: Proceedings of the Interspeech 2021 (2021)","DOI":"10.21437\/Interspeech.2021-1507"},{"key":"5_CR24","unstructured":"Dosovitskiy, A., et al.: An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint arXiv:2010.11929 (2020)"},{"key":"5_CR25","doi-asserted-by":"crossref","unstructured":"Gemmeke, J.F., et al.: Audio set: an ontology and human-labeled dataset for audio events. In: 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2017)","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"5_CR26","unstructured":"Wolf, T., et al.: Transformers: state-of-the-art natural language processing. In: Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations (2020)"},{"key":"5_CR27","doi-asserted-by":"crossref","unstructured":"McAuliffe, M., Socolof, M., Mihuc, S., Wagner, M., Sonderegger, M.: Montreal forced aligner: trainable text-speech alignment using kaldi. In: Interspeech (2017)","DOI":"10.21437\/Interspeech.2017-1386"},{"key":"5_CR28","first-page":"2579","volume":"9","author":"L van der Maaten","year":"2008","unstructured":"van der Maaten, L., Hinton, G.: Visualizing data using t-SNE. J. Mach. Learn. Res. 9, 2579\u20132605 (2008)","journal-title":"J. Mach. Learn. Res."},{"key":"5_CR29","doi-asserted-by":"crossref","unstructured":"Xu, Q., Baevski, A., Auli, M.: Simple and effective zero-shot cross-lingual phoneme recognition (2021)","DOI":"10.21437\/Interspeech.2022-60"},{"key":"5_CR30","doi-asserted-by":"crossref","unstructured":"Liu, Z., Huckvale, M., McGlashan, J.: Automated voice pathology discrimination from continuous speech benefits from analysis by phonetic context. In: Proceedings of the Interspeech 2022 (2022)","DOI":"10.21437\/Interspeech.2022-10154"},{"key":"5_CR31","doi-asserted-by":"crossref","unstructured":"Zeng, B., et al.: Exploring the acoustic and prosodic features of a lung-function-sensitive repeated-word speech articulation test. Front. Psychol. 14 (2023)","DOI":"10.3389\/fpsyg.2023.1167902"}],"container-title":["Lecture Notes in Computer Science","Artificial Intelligence in Healthcare"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-031-67278-1_5","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,13]],"date-time":"2024-08-13T06:03:52Z","timestamp":1723529032000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-031-67278-1_5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024]]},"ISBN":["9783031672774","9783031672781"],"references-count":31,"URL":"https:\/\/doi.org\/10.1007\/978-3-031-67278-1_5","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024]]},"assertion":[{"value":"14 August 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors declare that there are no competing interests relevant to this work.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"AIiH","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on AI in Healthcare","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Swansea","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"United Kingdom","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"4 September 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"6 September 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"1","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"aiih2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/aiih.cc","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}