{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T05:21:19Z","timestamp":1780636879076,"version":"3.54.1"},"reference-count":54,"publisher":"IEEE","license":[{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2021,9,27]],"date-time":"2021-09-27T00:00:00Z","timestamp":1632700800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2021,9,27]]},"DOI":"10.1109\/iros51168.2021.9635989","type":"proceedings-article","created":{"date-parts":[[2021,12,16]],"date-time":"2021-12-16T20:45:38Z","timestamp":1639687538000},"page":"3520-3527","source":"Crossref","is-referenced-by-count":75,"title":["ViNet: Pushing the limits of Visual Modality for Audio-Visual Saliency Prediction"],"prefix":"10.1109","author":[{"given":"Samyak","family":"Jain","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Pradeep","family":"Yarlagadda","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shreyank","family":"Jyoti","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shyamgopal","family":"Karthik","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Ramanathan","family":"Subramanian","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Vineet","family":"Gandhi","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref39","first-page":"1","article-title":"Fixation prediction through multimodal analysis","volume":"13","author":"min","year":"2016","journal-title":"ACM Transactions on Multimedia Computing Communications and Applications (TOMM)"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2014.2329380"},{"key":"ref33","first-page":"744","volume":"69","author":"vatakis","year":"2007"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1037\/0096-1523.34.5.1053"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1038\/264746a0"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2017.2777665"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2011.6095124"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2008.4543329"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1007\/978-1-4939-3435-5_16"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1167\/14.8.5"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2017.2787612"},{"key":"ref27","article-title":"Video saliency detection with domain adaption using hierarchical gradient reversal layers","author":"bellitto","year":"2020"},{"key":"ref29","article-title":"Recurrent mixture density network for spatiotemporal visual attention","author":"bazzani","year":"2016"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/ROBOT.2008.4543572"},{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00482"},{"key":"ref20","article-title":"Dave: A deep audio-visual embedding for dynamic saliency prediction","author":"tavakoli","year":"2019"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206557"},{"key":"ref21","article-title":"Soundnet: Learning sound representations from unlabeled video","author":"aytar","year":"2016","journal-title":"NeurIPS"},{"key":"ref24","article-title":"Predicting video saliency with object-to-motion cnn and two-layer convolutional lstm","author":"jiang","year":"2017"},{"key":"ref23","article-title":"Video saliency prediction using enhanced spatiotemporal alignment network","author":"chen","year":"2020"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9341574"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7298710"},{"key":"ref50","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2008.4587727"},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1007\/s12559-010-9074-z"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2018.2815601"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-10584-0_33"},{"key":"ref52","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2015.08.004"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1145\/2502081.2502128"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1145\/3313831.3376544"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2020.2966082"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00514"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58558-7_25"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v34i07.6927"},{"key":"ref15","article-title":"Simple vs complex temporal recurrences for video saliency prediction","author":"linardos","year":"2019"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00248"},{"key":"ref17","article-title":"U-net: Convolutional networks for biomedical image segmentation","author":"ronneberger","year":"2015","journal-title":"MICCAI"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1167\/8.5.2"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.16910\/jemr.5.4.2"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1007\/s12369-012-0174-7"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/TAMD.2014.2303072"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2017.327"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW.2019.00148"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2013.2282897"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2019.2936112"},{"key":"ref7","first-page":"193","article-title":"Visual saliency based object tracking","author":"zhang","year":"2009","journal-title":"Asian Conference on Computer Vision"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1016\/j.image.2013.07.003"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01216-8_16"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01231-1_39"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2924417"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01267-0_19"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01246-5_27"},{"key":"ref41","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00109"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00458"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.73"}],"event":{"name":"2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)","location":"Prague, Czech Republic","start":{"date-parts":[[2021,9,27]]},"end":{"date-parts":[[2021,10,1]]}},"container-title":["2021 IEEE\/RSJ International Conference on Intelligent Robots and Systems (IROS)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9635848\/9635849\/09635989.pdf?arnumber=9635989","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,10]],"date-time":"2022-05-10T16:54:32Z","timestamp":1652201672000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9635989\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,9,27]]},"references-count":54,"URL":"https:\/\/doi.org\/10.1109\/iros51168.2021.9635989","relation":{},"subject":[],"published":{"date-parts":[[2021,9,27]]}}}