{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T10:21:16Z","timestamp":1730283676211,"version":"3.28.0"},"reference-count":37,"publisher":"IEEE","license":[{"start":{"date-parts":[[2022,9,26]],"date-time":"2022-09-26T00:00:00Z","timestamp":1664150400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2022,9,26]],"date-time":"2022-09-26T00:00:00Z","timestamp":1664150400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2022,9,26]]},"DOI":"10.1109\/mmsp55362.2022.9949329","type":"proceedings-article","created":{"date-parts":[[2022,11,22]],"date-time":"2022-11-22T21:39:16Z","timestamp":1669153156000},"page":"1-6","source":"Crossref","is-referenced-by-count":0,"title":["As We Speak: Real-Time Visually Guided Speaker Separation and Localization"],"prefix":"10.1109","author":[{"given":"Piotr","family":"Czarnecki","sequence":"first","affiliation":[{"name":"Samsung R&#x0026;D Institute,Poland"}]},{"given":"Jakub","family":"Tkaczuk","sequence":"additional","affiliation":[{"name":"Samsung R&#x0026;D Institute,Poland"}]}],"member":"263","reference":[{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00525"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2021-1609"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_35"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00182"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01248"},{"key":"ref36","first-page":"51","author":"virtanen","year":"2012","journal-title":"Voice Activity Detection Noise Estimation and Adaptive Filters for Acoustic Signal Enhancement 1st"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref34","first-page":"3443","article-title":"Fast pixel-based video scene change detection","volume":"4","author":"yi","year":"2005","journal-title":"2005 IEEE International Symposium on Circuits and Systems"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2444659"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2017.2726762"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462116"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462417"},{"journal-title":"Coarse-to-fine optimization for speech en-hancement","year":"2019","author":"yao","key":"ref14"},{"key":"ref15","article-title":"Phase-aware speech enhancement with deep complex u-net","author":"choi","year":"2019","journal-title":"International Conference on Learning Representations"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462581"},{"key":"ref17","doi-asserted-by":"crossref","first-page":"1849","DOI":"10.1109\/TASLP.2014.2352935","article-title":"On training targets for super-vised speech separation","volume":"22","author":"wang","year":"2014","journal-title":"IEEE\/ACM Transactions on Audio Speech and Language Processing"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682790"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/TASLP.2015.2512042"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00223"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1405"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58523-5_13"},{"key":"ref6","article-title":"Audio vision: Using audio-visual syn-chrony to locate sounds","volume":"12","author":"hershey","year":"2000","journal-title":"Advances in neural information processing systems"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00398"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2017-1465"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/JPROC.2003.817150"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/TETCI.2017.2784878"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1400"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.eswa.2021.115833"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201357"},{"key":"ref20","article-title":"Wavenet: A generative model for raw audio","author":"oord","year":"2016","journal-title":"ArXiv"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8683634"},{"key":"ref21","article-title":"An empirical evaluation of generic convolutional and recurrent networks for sequence modeling","volume":"abs 1803 1271","author":"bai","year":"2018","journal-title":"CoRR"},{"key":"ref24","first-page":"-609i","article-title":"Voice activity detection using visual information","volume":"1","author":"liu","year":"2004","journal-title":"2004 IEEE International Conference on Acoustics Speech and Signal Processing"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2019.8682834"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2019-3114"},{"key":"ref25","article-title":"Ava-activespeaker: An audio-visual dataset for active speaker detection","author":"roth","year":"2020","journal-title":"ICA SSP"}],"event":{"name":"2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)","start":{"date-parts":[[2022,9,26]]},"location":"Shanghai, China","end":{"date-parts":[[2022,9,28]]}},"container-title":["2022 IEEE 24th International Workshop on Multimedia Signal Processing (MMSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9948698\/9948704\/09949329.pdf?arnumber=9949329","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,12,12]],"date-time":"2022-12-12T19:54:30Z","timestamp":1670874870000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9949329\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,9,26]]},"references-count":37,"URL":"https:\/\/doi.org\/10.1109\/mmsp55362.2022.9949329","relation":{},"subject":[],"published":{"date-parts":[[2022,9,26]]}}}