{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2024,10,30]],"date-time":"2024-10-30T10:20:26Z","timestamp":1730283626085,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,10,6]],"date-time":"2021-10-06T00:00:00Z","timestamp":1633478400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,10,6]],"date-time":"2021-10-06T00:00:00Z","timestamp":1633478400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,10,6]],"date-time":"2021-10-06T00:00:00Z","timestamp":1633478400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,10,6]]},"DOI":"10.1109\/mmsp53017.2021.9733678","type":"proceedings-article","created":{"date-parts":[[2022,3,16]],"date-time":"2022-03-16T20:00:32Z","timestamp":1647460832000},"page":"1-6","source":"Crossref","is-referenced-by-count":7,"title":["Visually Supervised Speaker Detection and Localization via Microphone Array"],"prefix":"10.1109","author":[{"given":"Davide","family":"Berghi","sequence":"first","affiliation":[{"name":"University of Surrey,CVSSP,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Adrian","family":"Hilton","sequence":"additional","affiliation":[{"name":"University of Surrey,CVSSP,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Philip J.B.","family":"Jackson","sequence":"additional","affiliation":[{"name":"University of Surrey,CVSSP,UK"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/VRW50115.2020.00184"},{"key":"ref30","article-title":"Perceptual thresholds of audio-visual spatial coherence for a variety of audio-visual objects","author":"stenzel","year":"2018","journal-title":"Conference on audio for virtual and augmented reality"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46448-0_48"},{"key":"ref11","first-page":"609","article-title":"Look, listen and learn","author":"arandjelovic","year":"2017","journal-title":"ICCV"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00715"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01144"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58548-8_37"},{"key":"ref15","article-title":"A dataset of reverberant spatial sound scenes with moving sources for sound event localization and detection","author":"politis","year":"2020","journal-title":"Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE)"},{"year":"2019","author":"lecun","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.2200\/S00196ED1V01Y200906AIM006"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICME.2000.871073"},{"key":"ref19","article-title":"Towards speaker detection using lips movements for human-machine multiparty dialogue","author":"haider","year":"2012","journal-title":"Proceedings of Fonetik"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1007\/s11263-014-0733-5"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00041"},{"key":"ref27","first-page":"84","article-title":"ImageNet classification with deep convolutional neural networks","volume":"25","author":"krizhevsky","year":"2012","journal-title":"Advances in neural information processing systems"},{"key":"ref3","article-title":"Self-supervised generation of spatial audio for 360&#x00B0; video","author":"morgado","year":"2018","journal-title":"NIPS"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00398"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1121\/1.1323720"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_3"},{"key":"ref8","article-title":"Distilling the knowledge in a neural network","author":"hinton","year":"2015","journal-title":"Deep Learning and Representation Learning Workshop NIPS"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_35"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1145\/3197517.3201357"},{"key":"ref9","article-title":"Soundnet: Learning sound representations from unlabeled video","author":"aytar","year":"2016","journal-title":"NIPS"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.21437\/Interspeech.2018-1400"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1145\/2818346.2820780"},{"article-title":"Naver at ActivityNet Challenge 2019 - Task B Active speaker detection (AVA)","year":"2019","author":"chung","key":"ref22"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP40776.2020.9053900"},{"key":"ref24","first-page":"2051","article-title":"Tracking sound sources for object-based spatial audio in 3D audiovisual production","author":"izhar","year":"2020","journal-title":"Forum Acusticum"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01248"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.17743\/jaes.2020.0025"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2016.09.072"}],"event":{"name":"2021 IEEE 23rd International Workshop on Multimedia Signal Processing (MMSP)","start":{"date-parts":[[2021,10,6]]},"location":"Tampere, Finland","end":{"date-parts":[[2021,10,8]]}},"container-title":["2021 IEEE 23rd International Workshop on Multimedia Signal Processing (MMSP)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9733399\/9733432\/09733678.pdf?arnumber=9733678","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,7,9]],"date-time":"2022-07-09T02:21:35Z","timestamp":1657333295000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9733678\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,10,6]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/mmsp53017.2021.9733678","relation":{},"subject":[],"published":{"date-parts":[[2021,10,6]]}}}