{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T07:43:54Z","timestamp":1760255034399,"version":"build-2065373602"},"publisher-location":"Cham","reference-count":42,"publisher":"Springer Nature Switzerland","isbn-type":[{"value":"9783032079558","type":"print"},{"value":"9783032079565","type":"electronic"}],"license":[{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,10,13]],"date-time":"2025-10-13T00:00:00Z","timestamp":1760313600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-3-032-07956-5_3","type":"book-chapter","created":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T07:19:34Z","timestamp":1760253574000},"page":"39-51","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["WhiSQA: Non-intrusive Speech Quality Prediction Using Whisper Encoder Features"],"prefix":"10.1007","author":[{"given":"George","family":"Close","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kris","family":"Hong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Thomas","family":"Hain","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stefan","family":"Goetze","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2025,10,13]]},"reference":[{"issue":"6","key":"3_CR1","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/MSP.2011.942469","volume":"28","author":"S M\u00f6ller","year":"2011","unstructured":"M\u00f6ller, S., Chan, W.-Y., C\u00f4t\u00e9, N., Falk, T.H., Raake, A., W\u00e4ltermann, M.: Speech quality estimation: models and trends. IEEE Signal Process. Mag. 28(6), 18\u201328 (2011)","journal-title":"IEEE Signal Process. Mag."},{"key":"3_CR2","doi-asserted-by":"crossref","unstructured":"Rohdenburg, T., Goetze, S., Hohmann, V., Kammeyer, K.-D., Kollmeier, B.: Objective perceptual quality assessment for self-steering binaural hearing aid microphone arrays. In: Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2008)","DOI":"10.1109\/ICASSP.2008.4518143"},{"key":"3_CR3","doi-asserted-by":"publisher","DOI":"10.1201\/b14529","volume-title":"Speech Enhancement: Theory and Practice","author":"P Loizou","year":"2013","unstructured":"Loizou, P.: Speech Enhancement: Theory and Practice, 2nd edn. CRC Press, Boca Raton (2013)","edition":"2"},{"issue":"6","key":"3_CR4","doi-asserted-by":"publisher","first-page":"386","DOI":"10.17743\/jaes.2014.0025","volume":"62","author":"S Goetze","year":"2014","unstructured":"Goetze, S., Albertin, E., Rennies, J., Habets, E.A., Kammeyer, K.D.: Speech quality assessment for listening-room compensation. J. Audio Eng. Soc. 62(6), 386\u2013399 (2014)","journal-title":"J. Audio Eng. Soc."},{"key":"3_CR5","doi-asserted-by":"crossref","unstructured":"Mittag, G.,\u00a0Naderi, B.,\u00a0Chehadi, A.,\u00a0M\u00f6ller, S.: NISQA: a deep CNN-self-attention model for multidimensional speech quality prediction with crowdsourced datasets. In: Interspeech 2021 (2021)","DOI":"10.21437\/Interspeech.2021-299"},{"key":"3_CR6","doi-asserted-by":"crossref","unstructured":"Kumar, A., et al.: Torchaudio-squim: reference-less speech quality and intelligibility measures in torchaudio (2023)","DOI":"10.1109\/ICASSP49357.2023.10096680"},{"key":"3_CR7","unstructured":"International Telecommunication Union: Recommendation ITU-T P.800.2 Mean opinion score interpretation and reporting. ITU, ITU-T Recommendation (2016)"},{"key":"3_CR8","unstructured":"International Telecommunication Union: Recommendation ITU-R BS.1534-3 Method for the Subjective Assessment of Intermediate Quality Level of Audio Systems. ITU, ITU-R Recommendation (2015)"},{"key":"3_CR9","doi-asserted-by":"crossref","unstructured":"Goetze, S., et al.: A study on speech quality and speech intelligibility measures for quality assessment of single-channel dereverberation algorithms. In: International\u00a0Workshop on Acoustic Signal Enhancement (IWAENC) (2014)","DOI":"10.1109\/IWAENC.2014.6954293"},{"key":"3_CR10","doi-asserted-by":"crossref","unstructured":"Avila, A.,\u00a0Cauchi, B.,\u00a0Goetze, S.,\u00a0Doclo, S.,\u00a0Falk, T.: Performance comparison of intrusive and non-intrusive instrumental quality measures for enhanced speech. In: International\u00a0Workshop on Acoustic Signal Enhancement (IWAENC) (2016)","DOI":"10.1109\/IWAENC.2016.7602907"},{"key":"3_CR11","unstructured":"Rix, A.,\u00a0Beerends, J.,\u00a0Hollier, M.,\u00a0Hekstra, A.: Perceptual evaluation of speech quality (pesq)-a new method for speech quality assessment of telephone networks and codecs. In: 2001 IEEE ICASSP (2001)"},{"key":"3_CR12","doi-asserted-by":"crossref","unstructured":"Taal, C.H., Hendriks, R.C.,\u00a0Heusdens, R.,\u00a0Jensen, J.: A short-time objective intelligibility measure for time-frequency weighted noisy speech. In: ICASSP 2010 (2010)","DOI":"10.1109\/ICASSP.2010.5495701"},{"key":"3_CR13","doi-asserted-by":"crossref","unstructured":"Mart\u00edn-Do\u00f1as, J.,\u00a0Gomez, A.,\u00a0Gonzalez\u00a0Lopez, J.,\u00a0Peinado, A.: A deep learning loss function based on the perceptual evaluation of the speech quality IEEE Signal Process. Lett.\u00a0(2018)","DOI":"10.1109\/LSP.2018.2871419"},{"key":"3_CR14","first-page":"201","volume":"2021","author":"S-W Fu","year":"2021","unstructured":"Fu, S.-W., et al.: MetricGAN+: an improved version of MetricGAN for speech enhancement. Proc. Interspeech 2021, 201\u2013205 (2021)","journal-title":"Proc. Interspeech"},{"key":"3_CR15","doi-asserted-by":"crossref","unstructured":"Close, G., Hain, T., Goetze, S.: MetricGAN+\/-: increasing robustness of noise reduction on unseen data. In: EUSIPCO 2022, Belgrade, Serbia (2022)","DOI":"10.23919\/EUSIPCO55093.2022.9909682"},{"key":"3_CR16","first-page":"936","volume":"2022","author":"R Cao","year":"2022","unstructured":"Cao, R., Abdulatif, S., Yang, B.: CMGAN: conformer-based metric GAN for speech enhancement. Proc. Interspeech 2022, 936\u2013940 (2022)","journal-title":"Proc. Interspeech"},{"key":"3_CR17","doi-asserted-by":"crossref","unstructured":"Close, G.,\u00a0Ravenscroft, W.,\u00a0Hain, T.,\u00a0Goetze, S.: Multi-CMGAN+\/+: leveraging multi-objective speech quality metric prediction for speech enhancement. In: IEEE International\u00a0Conference\u00a0on Acoustics, Speech and Signal Processing (ICASSP\u201924) (2024)","DOI":"10.1109\/ICASSP48485.2024.10448343"},{"key":"3_CR18","doi-asserted-by":"crossref","unstructured":"Mai, Y.,\u00a0Goetze, S.: MetricGAN+KAN: Kolmogorov-Arnold networks in metric-driven speech enhancement systems. In: Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP\u201925) (2025)","DOI":"10.1109\/ICASSP49660.2025.10890542"},{"key":"3_CR19","first-page":"3854","volume":"2024","author":"D Oliveira","year":"2024","unstructured":"Oliveira, D., Welker, S., Richter, J., Gerkmann, T.: The pesqetarian: on the relevance of goodhart\u2019s law for speech enhancement. Interspeech 2024, 3854\u20133858 (2024)","journal-title":"Interspeech"},{"key":"3_CR20","doi-asserted-by":"crossref","unstructured":"Close, G., Hain, T., Goetze, S.: Identifying hallucination in perceptually motivated speech enhancement networks. In: 32nd European Signal Processing Conference (EUSIPCO24), Lyon, France (2024)","DOI":"10.23919\/EUSIPCO63174.2024.10714927"},{"issue":"7","key":"3_CR21","doi-asserted-by":"publisher","first-page":"1151","DOI":"10.1109\/TASLP.2019.2912123","volume":"27","author":"B Cauchi","year":"2019","unstructured":"Cauchi, B., Siedenburg, K., Santos, J.F., Falk, T.H., Doclo, S., Goetze, S.: Non-intrusive speech quality prediction using modulation energies and LSTM-network. IEEE\/ACM Trans. Audio Speech Lang. Process. 27(7), 1151\u20131163 (2019)","journal-title":"IEEE\/ACM Trans. Audio Speech Lang. Process."},{"key":"3_CR22","doi-asserted-by":"crossref","unstructured":"Reddy, C.K.A.,\u00a0Gopal, V.,\u00a0Cutler, R.: Dnsmos p.835: a non-intrusive perceptual objective speech quality metric to evaluate noise suppressors (2022)","DOI":"10.1109\/ICASSP43922.2022.9746108"},{"key":"3_CR23","first-page":"3308","volume":"2022","author":"G Yi","year":"2022","unstructured":"Yi, G., et al.: ConferencingSpeech 2022 challenge: non-intrusive objective speech quality assessment (NISQA) challenge for online conferencing applications. Proc. Interspeech 2022, 3308\u20133312 (2022)","journal-title":"Proc. Interspeech"},{"key":"3_CR24","doi-asserted-by":"crossref","unstructured":"Dong, X., Williamson, D.S.: A pyramid recurrent network for predicting crowdsourced speech-quality ratings of real-world signals. In: Interspeech 2020, pp. 4631\u20134635 (2020)","DOI":"10.21437\/Interspeech.2020-2809"},{"key":"3_CR25","doi-asserted-by":"crossref","unstructured":"X\u00a0Warzybok, D., et al.: Subjective speech quality and speech intelligibility evaluation of single-channel dereverberation algorithms. In: Proceedings of\u00a0International\u00a0Workshop on Acoustic Signal Enhancement (IWAENC 2014) (2014)","DOI":"10.1109\/IWAENC.2014.6954313"},{"key":"3_CR26","doi-asserted-by":"publisher","DOI":"10.1016\/j.heares.2022.108606","volume":"426","author":"M Karbasi","year":"2022","unstructured":"Karbasi, M., Kolossa, D.: ASR-based speech intelligibility prediction: a review. Hear. Res. 426, 108606 (2022)","journal-title":"Hear. Res."},{"key":"3_CR27","doi-asserted-by":"crossref","unstructured":"Barker, J., et al.: The 2nd clarity prediction challenge: a machine learning challenge for hearing aid intelligibility prediction. In: ICASSP (2024)","DOI":"10.1109\/ICASSP48485.2024.10446441"},{"key":"3_CR28","doi-asserted-by":"crossref","unstructured":"Close, G., Hain, T., Goetze, S.: Non intrusive intelligibility predictor for hearing impaired individuals using self supervised speech representations. In: Proceedings of ASRU Satellite Workshop, Workshop on Speech Foundation Models and their Performance Benchmarks (SPARKS) (2023)","DOI":"10.21437\/Interspeech.2022-10182"},{"key":"3_CR29","unstructured":"Cuervo, S., Marxer, R.: Temporal-hierarchical features from noise-robust speech foundation models for non-intrusive intelligibility prediction. In: Clarity Workshop 2022 (2022)"},{"key":"3_CR30","doi-asserted-by":"crossref","unstructured":"Mogridge, R., et al.: Non-intrusive speech intelligibility prediction for hearing-impaired users using intermediate asr features and human memory models. In: IEEE International\u00a0Conference\u00a0on Acoustics, Speech and Signal Processing (ICASSP\u201924) (2024)","DOI":"10.1109\/ICASSP48485.2024.10447597"},{"key":"3_CR31","doi-asserted-by":"crossref","unstructured":"Pasad, A., Shi, B., Livescu, K.: Comparative layer-wise analysis of self-supervised speech models. In: IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (2023)","DOI":"10.1109\/ICASSP49357.2023.10096149"},{"key":"3_CR32","doi-asserted-by":"crossref","unstructured":"Close, G.,\u00a0Ravenscroft, W.,\u00a0Hain, T.,\u00a0Goetze, S.: Perceive and predict: self-supervised speech representation based loss functions for speech enhancement. In: Proceedings of ICASSP 2023 (2023)","DOI":"10.1109\/ICASSP49357.2023.10095666"},{"key":"3_CR33","unstructured":"Radford, A., Kim, J.W.,\u00a0Xu, T.,\u00a0Brockman, G.,\u00a0McLeavey, C.,\u00a0Sutskever, I.: Robust speech recognition via large-scale weak supervision (2022)"},{"key":"3_CR34","doi-asserted-by":"crossref","unstructured":"Fu, S.-W.,\u00a0Yu, C., Hung, K.-H.,\u00a0Ravanelli, M.,\u00a0Tsao, Y.: Metricgan-u: unsupervised speech enhancement\/dereverberation based only on noisy\/reverberated speech (2021)","DOI":"10.1109\/ICASSP43922.2022.9747180"},{"key":"3_CR35","doi-asserted-by":"crossref","unstructured":"Tamm, B.,\u00a0Vandenberghe, R.,\u00a0Van\u00a0hamme, H.: Analysis of xls-r for speech quality assessment. In: Proceedings of WASPAA 2023, pp. 1\u20135 (2023)","DOI":"10.1109\/WASPAA58266.2023.10248049"},{"key":"3_CR36","doi-asserted-by":"crossref","unstructured":"W\u00e4ltermann, M.: Dimension-based quality modeling of transmitted speech (2013). https:\/\/api.semanticscholar.org\/CorpusID:63687570","DOI":"10.1007\/978-3-642-35019-1"},{"key":"3_CR37","doi-asserted-by":"crossref","unstructured":"Hashmi, A.: Perceptual evaluation of speech quality for inexpensive recording equipment. Acoustics 3(1), 200\u2013211 (2021). https:\/\/www.mdpi.com\/2624-599X\/3\/1\/14","DOI":"10.3390\/acoustics3010014"},{"key":"3_CR38","doi-asserted-by":"crossref","unstructured":"Richey, C., et al.: Voices obscured in complex environmental settings (voices) corpus (2018)","DOI":"10.21437\/Interspeech.2018-1454"},{"key":"3_CR39","doi-asserted-by":"crossref","unstructured":"Mittag, G., et al.: DNN no-reference PSTN speech quality prediction. In: Proceedings of Interspeech 2020 (2020)","DOI":"10.21437\/Interspeech.2020-2760"},{"key":"3_CR40","unstructured":"Kingma, D.P.,\u00a0Ba, J.: Adam: a method for stochastic optimization CoRR (2014)"},{"key":"3_CR41","doi-asserted-by":"crossref","unstructured":"Shen, K.,\u00a0Yan, D.,\u00a0Dong, L.: MSQAT: a multi-dimension non-intrusive speech quality assessment transformer utilizing self-supervised representations. Appl. Acoust. 212, 109584 (2023). https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0003682X23003821","DOI":"10.1016\/j.apacoust.2023.109584"},{"key":"3_CR42","doi-asserted-by":"crossref","unstructured":"Leglaive, S., et al.: The CHiME-7 UDASE task: unsupervised domain adaptation for conversational speech enhancement (2023)","DOI":"10.21437\/CHiME.2023-2"}],"container-title":["Lecture Notes in Computer Science","Speech and Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-032-07956-5_3","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,10,12]],"date-time":"2025-10-12T07:19:45Z","timestamp":1760253585000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-032-07956-5_3"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,13]]},"ISBN":["9783032079558","9783032079565"],"references-count":42,"URL":"https:\/\/doi.org\/10.1007\/978-3-032-07956-5_3","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,10,13]]},"assertion":[{"value":"13 October 2025","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"SPECOM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Speech and Computer","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Szeged","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Hungary","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"13 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"27","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"specom2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/specom.inf.u-szeged.hu\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}