{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,3]],"date-time":"2026-04-03T15:28:14Z","timestamp":1775230094294,"version":"3.50.1"},"reference-count":38,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T00:00:00Z","timestamp":1721001600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,7,15]]},"DOI":"10.1109\/icme57554.2024.10688047","type":"proceedings-article","created":{"date-parts":[[2024,9,30]],"date-time":"2024-09-30T17:24:16Z","timestamp":1727717056000},"page":"1-6","source":"Crossref","is-referenced-by-count":14,"title":["A Study On Incorporating Whisper For Robust Speech Assessment"],"prefix":"10.1109","author":[{"given":"Ryandhimas E.","family":"Zezario","sequence":"first","affiliation":[{"name":"National Taiwan University"}]},{"given":"Yu-Wen","family":"Chen","sequence":"additional","affiliation":[{"name":"Columbia University"}]},{"given":"Szu-Wei","family":"Fu","sequence":"additional","affiliation":[{"name":"NVIDIA"}]},{"given":"Yu","family":"Tsao","sequence":"additional","affiliation":[{"name":"Academia Sinica"}]},{"given":"Hsin-Min","family":"Wang","sequence":"additional","affiliation":[{"name":"Academia Sinica"}]},{"given":"Chiou-Shann","family":"Fuh","sequence":"additional","affiliation":[{"name":"National Taiwan University"}]}],"member":"263","reference":[{"key":"ref1","first-page":"862","article-title":"Perceptual Evaluation of Speech Quality (PESQ), an Objective Method for End-to-end Speech Quality Assessment of Narrow-band Telephone Networks and Speech Codecs","author":"Rix","year":"2001","journal-title":"ITU-T Recommendation"},{"issue":"6","key":"ref2","first-page":"366","article-title":"Perceptual Objective Listening Quality Assessment (POLQA), The Third Generation ITU-T Standard for End-to-end Speech Quality Measurement Part I\u2014temporal Alignment","volume":"61","author":"Beerends","year":"2013","journal-title":"Journal of The Audio Engineering Society"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1121\/1.384464"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1121\/1.1804628"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2011.2114881"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2016.2585878"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1121\/10.0005899"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2401513"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2011.09.004"},{"key":"ref10","first-page":"391","article-title":"MB-Net: MOS Prediction for Synthesized Speech with Mean-Bias Network","volume-title":"Proc. ICASSP","author":"Leng"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-2013"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747222"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746395"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3205757"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1802"},{"key":"ref16","first-page":"482","article-title":"STOI-Net: A Deep Learning-Based Non-Intrusive Speech Intelligibility Assessment Model","volume-title":"Proc. APSIPA ASC","author":"Zezario"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053366"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1250\/ast.e24.12"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10262"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-439"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10828"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10838"},{"key":"ref23","first-page":"28492","article-title":"Robust Speech Recognition via Large-Scale Weak Supervision","volume-title":"Proc. ICML","author":"Radford"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639585"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10153"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2023-2193"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389763"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/89.902276"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1985.1164550"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/APSIPA.2017.8281993"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-130"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053591"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-517"},{"key":"ref34","article-title":"Music Source Separation in the Waveform Domain","author":"D\u00e9fossez","year":"2021"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.2307\/1412159"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/TASL.2007.911054"},{"key":"ref37","first-page":"1","article-title":"LESSL-MOS: Self-Supervised Learning MOS Prediction with Listener Enhancement","volume-title":"Proc. ASRU","author":"Qi"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389634"}],"event":{"name":"2024 IEEE International Conference on Multimedia and Expo (ICME)","location":"Niagara Falls, ON, Canada","start":{"date-parts":[[2024,7,15]]},"end":{"date-parts":[[2024,7,19]]}},"container-title":["2024 IEEE International Conference on Multimedia and Expo (ICME)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10685847\/10687354\/10688047.pdf?arnumber=10688047","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,1]],"date-time":"2024-10-01T06:22:47Z","timestamp":1727763767000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10688047\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,7,15]]},"references-count":38,"URL":"https:\/\/doi.org\/10.1109\/icme57554.2024.10688047","relation":{},"subject":[],"published":{"date-parts":[[2024,7,15]]}}}