{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T19:34:38Z","timestamp":1776886478927,"version":"3.51.2"},"reference-count":28,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1109\/apsipaasc63619.2025.10848735","type":"proceedings-article","created":{"date-parts":[[2025,1,27]],"date-time":"2025-01-27T18:37:05Z","timestamp":1738003025000},"page":"1-6","source":"Crossref","is-referenced-by-count":3,"title":["A Non-Intrusive Speech Quality Assessment Model using Whisper and Multi-Head Attention"],"prefix":"10.1109","author":[{"given":"Guojian","family":"Lin","sequence":"first","affiliation":[{"name":"Southern University of Science and Technology,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yu","family":"Tsao","sequence":"additional","affiliation":[{"name":"Research Center for Information Technology Innovation,Academia Sinica,Taiwan"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fei","family":"Chen","sequence":"additional","affiliation":[{"name":"Southern University of Science and Technology,Shenzhen,China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2001.941023"},{"issue":"6","key":"ref2","first-page":"366","article-title":"Perceptual objective listening quality assessment (POLQA), the third generation itu-t standard for end-to-end speech quality measurement part i\u2014temporal alignment","volume":"61","author":"Beerends","year":"2013","journal-title":"Journal of the Audio Engineering Society"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.858005"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.specom.2020.05.001"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1802"},{"key":"ref6","first-page":"482","article-title":"STOI-Net: A deep learning based non-intrusive speech intelligibility assessment model","volume-title":"2020 Asia-Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","author":"Zezario"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053366"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-2003"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413877"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9747222"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-970"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP43922.2022.9746395"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-439"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-11247"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10262"},{"key":"ref16","first-page":"28 492","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International Conference on Machine Learning","author":"Radford"},{"key":"ref17","article-title":"Utilizing Whisper to enhance multi-branched speech intelligibility prediction model for hearing aids","author":"Zezario","year":"2023"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/JSTSP.2022.3188113"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2024.3378107"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3205757"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389763"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/SLT.2018.8639585"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-10153"},{"key":"ref24","first-page":"5178","article-title":"BEATs: Audio pre-Training with acoustic tokenizers","volume-title":"International Conference on Machine Learning","author":"Chen"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2021.3122291"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2022-143"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/ASRU57964.2023.10389788"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/icme57554.2024.10688047"}],"event":{"name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","location":"Macau, Macao","start":{"date-parts":[[2024,12,3]]},"end":{"date-parts":[[2024,12,6]]}},"container-title":["2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10848542\/10848533\/10848735.pdf?arnumber=10848735","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,28]],"date-time":"2025-01-28T06:19:23Z","timestamp":1738045163000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10848735\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":28,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc63619.2025.10848735","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]}}}