{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,8,27]],"date-time":"2025-08-27T00:19:33Z","timestamp":1756253973165,"version":"3.44.0"},"reference-count":27,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,1]],"date-time":"2021-10-01T00:00:00Z","timestamp":1633046400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100007224","name":"National Foundation for Science and Technology Development","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100007224","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10]]},"DOI":"10.1109\/mapr53640.2021.9585244","type":"proceedings-article","created":{"date-parts":[[2021,10,29]],"date-time":"2021-10-29T15:28:04Z","timestamp":1635521284000},"page":"1-6","source":"Crossref","is-referenced-by-count":3,"title":["Visual-guided audio source separation: an empirical study"],"prefix":"10.1109","author":[{"given":"Thanh Thi-Hien","family":"Duong","sequence":"first","affiliation":[{"name":"Hanoi University of Mining and Geology,Dept. Information Technology,Hanoi,Vietnam"}]},{"given":"Manh Nguyen","family":"Huu","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology,School of Information Technology,Hanoi,Vietnam"}]},{"given":"Hai Nghiem","family":"Thi","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology,School of Electrical Engineering,Hanoi,Vietnam"}]},{"given":"Thi-Lan","family":"Le","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology,School of Electronics and Telecommunications,Hanoi,Vietnam"}]},{"given":"Phi-Le","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology,School of Information Technology,Hanoi,Vietnam"}]},{"given":"Quoc-Cuong","family":"Nguyen","sequence":"additional","affiliation":[{"name":"Hanoi University of Science and Technology,School of Electrical Engineering,Hanoi,Vietnam"}]}],"member":"263","reference":[{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_3"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00398"},{"key":"ref12","article-title":"Visualvoice: Audio-visual speech separation with cross-modal consistency","author":"gao","year":"2021","journal-title":"CVPR"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/TASSP.1984.1164317"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref15","first-page":"1","article-title":"Multichannel u-net for music source separation","author":"kadandale","year":"2020","journal-title":"IEEE Int Multimedia Signal Process Workshop"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2019.2915167"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7951787"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952687"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TCYB.2018.2883607"},{"key":"ref4","article-title":"Yolov4: Optimal speed and accuracy of object detection","author":"bochkovskiy","year":"2020","journal-title":"CoRR"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_35"},{"key":"ref3","article-title":"Soundnet: Learning sound representations from unlabeled video","author":"aytar","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1145\/2833258.2833276"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2020-1065"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201357"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2018.2869692"},{"key":"ref2","first-page":"208","article-title":"Self-supervised learning of audio-visual objects from video","author":"afouras","year":"2020","journal-title":"Computer Vision &#x2013; ECCV 2020"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01049"},{"journal-title":"My lips are concealed Audio-visual speech enhancement through obstructions","year":"2019","author":"afouras","key":"ref1"},{"key":"ref20","article-title":"Yolov3: An incremental improvement","author":"redmon","year":"2018","journal-title":"CoRR"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TSP.2017.2679692"},{"key":"ref21","doi-asserted-by":"crossref","first-page":"234","DOI":"10.1007\/978-3-319-24574-4_28","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"2015","journal-title":"Medical Image Computing and Computer-Assisted Intervention &#x2013; MICCAI 2015"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/TSA.2005.858005"},{"article-title":"Audio\/visual independent components","year":"2003","author":"smaragdis","key":"ref23"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00182"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00097"}],"event":{"name":"2021 International Conference on Multimedia Analysis and Pattern Recognition (MAPR)","start":{"date-parts":[[2021,10,15]]},"location":"Hanoi, Vietnam","end":{"date-parts":[[2021,10,16]]}},"container-title":["2021 International Conference on Multimedia Analysis and Pattern Recognition (MAPR)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9585222\/9585196\/09585244.pdf?arnumber=9585244","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,26]],"date-time":"2025-08-26T19:12:42Z","timestamp":1756235562000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9585244\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10]]},"references-count":27,"URL":"https:\/\/doi.org\/10.1109\/mapr53640.2021.9585244","relation":{},"subject":[],"published":{"date-parts":[[2021,10]]}}}