{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,5]],"date-time":"2026-02-05T09:43:24Z","timestamp":1770284604026,"version":"3.49.0"},"reference-count":45,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1109\/wacv45572.2020.9093438","type":"proceedings-article","created":{"date-parts":[[2020,5,15]],"date-time":"2020-05-15T03:41:09Z","timestamp":1589514069000},"page":"3240-3249","source":"Crossref","is-referenced-by-count":44,"title":["Coordinated Joint Multimodal Embeddings for Generalized Audio-Visual Zero-shot Classification and Retrieval of Videos"],"prefix":"10.1109","author":[{"given":"Kranti Kumar","family":"Parida","sequence":"first","affiliation":[]},{"given":"Neeraj","family":"Matiyali","sequence":"additional","affiliation":[]},{"given":"Tanaya","family":"Guha","sequence":"additional","affiliation":[]},{"given":"Gaurav","family":"Sharma","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","year":"2017","journal-title":"Large-scale weakly supervised sound event detection for smart cars"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2005.274"},{"key":"ref33","article-title":"Objects that sound","author":"arandjelovic\u00b4","year":"2017"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01219-9_3"},{"key":"ref31","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01246-5_35","article-title":"The sound of pixels","author":"zhao","year":"2018"},{"key":"ref30","doi-asserted-by":"crossref","DOI":"10.1007\/978-3-030-01231-1_39","article-title":"Audio-visual scene analysis with self-supervised multisensory features","author":"owens","year":"2018"},{"key":"ref37","doi-asserted-by":"crossref","first-page":"112","DOI":"10.1145\/3197517.3201357","article-title":"Looking to listen at the cocktail party: a speaker-independent audio-visual model for speech separation","volume":"37","author":"ephrat","year":"2018","journal-title":"ACM Transactions on Graphics"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00879"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00458"},{"key":"ref34","article-title":"Identify, locate and separate: Audio-visual object extraction in large video collections using weak supervision","author":"parekh","year":"2018"},{"key":"ref10","article-title":"Zero-shot learning-a comprehensive evaluation of the good, the bad and the ugly","author":"xian","year":"2018","journal-title":"Transactions on Pattern Analysis and Machine Intelligence"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00047"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.117"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_22"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2017.7952261"},{"key":"ref15","first-page":"935","article-title":"Zero-shot learning through cross-modal transfer","author":"socher","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.15"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.473"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN.2019.8852315"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_24"},{"key":"ref28","first-page":"801","article-title":"Ambient sound provides supervision for visual learning","author":"owens","year":"2016","journal-title":"European on Computer Vision"},{"key":"ref4","first-page":"2121","article-title":"Devise: A deep visual-semantic embedding model","author":"frome","year":"2013","journal-title":"Advances in neural information processing systems"},{"key":"ref27","first-page":"892","article-title":"Sound-net: Learning sound representations from unlabeled video","author":"aytar","year":"2016","journal-title":"Advances in neural information processing systems"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46475-6_4"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2013.140"},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.73"},{"key":"ref5","first-page":"2152","article-title":"An embarrassingly simple approach to zero-shot learning","author":"romera-paredes","year":"2015","journal-title":"Int J of Machine Learning"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2015.2487986"},{"key":"ref7","article-title":"Zero-shot learning by convex combination of semantic embeddings","author":"norouzi","year":"2013"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00581"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00450"},{"key":"ref1","first-page":"2927","article-title":"Evaluation of output embeddings for finegrained image classification","author":"akata","year":"2015","journal-title":"Computer Vision and Pattern Recognition"},{"key":"ref20","article-title":"Zero-shot detection","author":"zhu","year":"2018"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.2307\/2334380"},{"key":"ref22","doi-asserted-by":"crossref","first-page":"792","DOI":"10.1007\/978-3-319-71246-8_48","article-title":"A simple exponential family framework for zero-shot learning","author":"verma","year":"2017","journal-title":"Machine Learning and Knowledge Discovery in Databases"},{"key":"ref21","article-title":"Zero-shot object detection: Learning to simultaneously recognize and localize novel concepts","author":"rahman","year":"2018"},{"key":"ref42","article-title":"Advances in pre-training distributed word representations","author":"mikolov","year":"2018","journal-title":"International Journal of Language Resources and Evaluation"},{"key":"ref24","first-page":"2672","article-title":"Generative adversarial nets","author":"goodfellow","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/ICASSP.2018.8462200"},{"key":"ref23","article-title":"Zero-shot learning via class-conditioned deep generative models","author":"wang","year":"2018","journal-title":"Thirty-Second AAAI Conference on Artificial Intelligence"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00523"},{"key":"ref26","author":"li","year":"2019","journal-title":"Zero-shot learning for speech recognition with universal phonetic model"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_19"},{"key":"ref25","article-title":"Generative adversarial text to image synthesis","author":"reed","year":"2016"}],"event":{"name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","location":"Snowmass Village, CO, USA","start":{"date-parts":[[2020,3,1]]},"end":{"date-parts":[[2020,3,5]]}},"container-title":["2020 IEEE Winter Conference on Applications of Computer Vision (WACV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9087828\/9093261\/09093438.pdf?arnumber=9093438","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,30]],"date-time":"2022-06-30T15:17:58Z","timestamp":1656602278000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9093438\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3]]},"references-count":45,"URL":"https:\/\/doi.org\/10.1109\/wacv45572.2020.9093438","relation":{},"subject":[],"published":{"date-parts":[[2020,3]]}}}