{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,14]],"date-time":"2026-07-14T14:58:04Z","timestamp":1784041084484,"version":"3.55.0"},"reference-count":22,"publisher":"IEEE","license":[{"start":{"date-parts":[[2024,10,17]],"date-time":"2024-10-17T00:00:00Z","timestamp":1729123200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,10,17]],"date-time":"2024-10-17T00:00:00Z","timestamp":1729123200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2024,10,17]]},"DOI":"10.1109\/o-cocosda64382.2024.10800033","type":"proceedings-article","created":{"date-parts":[[2024,12,20]],"date-time":"2024-12-20T18:56:08Z","timestamp":1734720968000},"page":"1-6","source":"Crossref","is-referenced-by-count":2,"title":["Robust Audio-Visual Speech Enhancement: Correcting Misassignments in Complex Environments With Advanced Post-Processing"],"prefix":"10.1109","author":[{"given":"Wenze","family":"Ren","sequence":"first","affiliation":[{"name":"National Taiwan University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Kuo-Hsuan","family":"Hung","sequence":"additional","affiliation":[{"name":"National Taiwan University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Rong","family":"Chao","sequence":"additional","affiliation":[{"name":"National Taiwan University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"YouJin","family":"Li","sequence":"additional","affiliation":[{"name":"National Taiwan University"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hsin-Min","family":"Wang","sequence":"additional","affiliation":[{"name":"Academia Sinica"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yu","family":"Tsao","sequence":"additional","affiliation":[{"name":"Academia Sinica"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","article-title":"Phase-aware speech en-hancement with deep complex U-Net","volume-title":"Proc. ICLR","author":"Choi","year":"2019"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2537"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP39728.2021.9413901"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2013-130"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2842159"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.21437\/interspeech.2014-574"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/tetci.2017.2784878"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP48485.2024.10446372"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/SLT54892.2023.10022646"},{"key":"ref10","article-title":"VisualVoice: Audio-visual speech separation with cross-modal con-sistency","volume-title":"Proc. CVPR","author":"Gao","year":"2021"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1400"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2022.3153265"},{"key":"ref13","article-title":"Dual-path self-attention rnn for real-time speech enhancement","author":"Pandey","year":"2020","journal-title":"arXiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2017.7965918"},{"key":"ref15","doi-asserted-by":"crossref","DOI":"10.1109\/ICASSP49357.2023.10094306","article-title":"AV-Sepformer: Cross-attention sep-former for audio-visual target speaker extraction","volume-title":"Proc. ICASSP","author":"Lin","year":"2023"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2889052"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP49357.2023.10094992"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2022-157"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-2650"},{"key":"ref20","article-title":"FaceNet: A unified embedding for face recog-nition and clustering","volume-title":"Proc. CVPR","author":"Schroff","year":"2015"},{"key":"ref21","article-title":"mixup: Beyond empirical risk min-imization","volume-title":"in Proc. ICLR","author":"Zhang","year":"2018"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/icassp.2017.7952154"}],"event":{"name":"2024 27th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)","location":"Hsinchu City, Taiwan","start":{"date-parts":[[2024,10,17]]},"end":{"date-parts":[[2024,10,19]]}},"container-title":["2024 27th Conference of the Oriental COCOSDA International Committee for the Co-ordination and Standardisation of Speech Databases and Assessment Techniques (O-COCOSDA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/10799946\/10799972\/10800033.pdf?arnumber=10800033","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,1,15]],"date-time":"2025-01-15T19:28:35Z","timestamp":1736969315000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10800033\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,17]]},"references-count":22,"URL":"https:\/\/doi.org\/10.1109\/o-cocosda64382.2024.10800033","relation":{},"subject":[],"published":{"date-parts":[[2024,10,17]]}}}