{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:21:18Z","timestamp":1764400878498,"version":"3.46.0"},"reference-count":18,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,10,22]],"date-time":"2025-10-22T00:00:00Z","timestamp":1761091200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,10,22]]},"DOI":"10.1109\/apsipaasc65261.2025.11249018","type":"proceedings-article","created":{"date-parts":[[2025,11,28]],"date-time":"2025-11-28T18:40:26Z","timestamp":1764355226000},"page":"2400-2405","source":"Crossref","is-referenced-by-count":0,"title":["Robust Audio-Visual Speech Recognition in Noisy Clinical Environments"],"prefix":"10.1109","author":[{"given":"Akshita","family":"Abrol","sequence":"first","affiliation":[{"name":"Singapore Institute of Technology,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ridwan","family":"Arefeen","sequence":"additional","affiliation":[{"name":"Singapore Institute of Technology,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Haotong","family":"Yu","sequence":"additional","affiliation":[{"name":"Singapore Institute of Technology,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Alexi","family":"George","sequence":"additional","affiliation":[{"name":"Singapore Institute of Technology,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kelvin Zhenghao","family":"Li","sequence":"additional","affiliation":[{"name":"Tan Tock Seng Hospital,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhengkui","family":"Wang","sequence":"additional","affiliation":[{"name":"Singapore Institute of Technology,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Rong","family":"Tong","sequence":"additional","affiliation":[{"name":"Singapore Institute of Technology,Singapore"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Speech enabled visual acuity test","volume-title":"in Proceedings of Interspeech 2024","author":"Yap","year":"2024"},{"doi-asserted-by":"publisher","key":"ref2","DOI":"10.1063\/1.5080868"},{"doi-asserted-by":"publisher","key":"ref3","DOI":"10.32604\/cmc.2022.020376"},{"doi-asserted-by":"publisher","key":"ref4","DOI":"10.3390\/s22207738"},{"doi-asserted-by":"publisher","key":"ref5","DOI":"10.1109\/TASLP.2020.3000593"},{"doi-asserted-by":"publisher","key":"ref6","DOI":"10.21437\/Interspeech.2022-11311"},{"doi-asserted-by":"publisher","key":"ref7","DOI":"10.1109\/ICASSP43922.2022.9747375"},{"key":"ref8","article-title":"Libri2vox dataset: Target speaker extraction with diverse speaker conditions and synthetic data","author":"Liu","year":"2024","journal-title":"arXiv preprint"},{"doi-asserted-by":"publisher","key":"ref9","DOI":"10.1109\/ICASSP39728.2021.9413901"},{"doi-asserted-by":"publisher","key":"ref10","DOI":"10.1109\/ICASSP48485.2024.10445985"},{"key":"ref11","first-page":"2849228518","article-title":"Robust speech recognition via large-scale weak supervision","volume-title":"International conference on machine learning","author":"Radford"},{"doi-asserted-by":"publisher","key":"ref12","DOI":"10.21437\/interspeech.2021-299"},{"doi-asserted-by":"publisher","key":"ref13","DOI":"10.21437\/Interspeech.2023-205"},{"key":"ref14","article-title":"Mediapipe: A framework for building perception pipelines","author":"Lugaresi","year":"2019","journal-title":"arXiv preprint"},{"key":"ref15","article-title":"Real-time speech recognition for noisy multi-speaker clinical environments","author":"Abrol","year":"2025","journal-title":"Accepted by IALP 2025"},{"key":"ref16","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume-title":"International Conference on Learning Representations (ICLR)","volume":"1","author":"Hu"},{"doi-asserted-by":"publisher","key":"ref17","DOI":"10.1109\/ICASSP49357.2023.10096889"},{"key":"ref18","first-page":"6219","article-title":"Visualonly recognition of normal, whispered and silent speech","volume-title":"2018 ICASSP","author":"Petridis","year":"2018"}],"event":{"name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","start":{"date-parts":[[2025,10,22]]},"location":"Singapore, Singapore","end":{"date-parts":[[2025,10,24]]}},"container-title":["2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11248853\/11248968\/11249018.pdf?arnumber=11249018","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,29]],"date-time":"2025-11-29T07:17:29Z","timestamp":1764400649000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11249018\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,22]]},"references-count":18,"URL":"https:\/\/doi.org\/10.1109\/apsipaasc65261.2025.11249018","relation":{},"subject":[],"published":{"date-parts":[[2025,10,22]]}}}