{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,24]],"date-time":"2025-12-24T20:54:58Z","timestamp":1766609698958},"reference-count":47,"publisher":"IEEE","license":[{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2020,3,1]],"date-time":"2020-03-01T00:00:00Z","timestamp":1583020800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2020,3]]},"DOI":"10.1109\/wacv45572.2020.9093617","type":"proceedings-article","created":{"date-parts":[[2020,5,15]],"date-time":"2020-05-15T03:41:09Z","timestamp":1589514069000},"page":"516-525","source":"Crossref","is-referenced-by-count":37,"title":["Actor Conditioned Attention Maps for Video Action Detection"],"prefix":"10.1109","author":[{"given":"Oytun","family":"Ulutan","sequence":"first","affiliation":[]},{"given":"Swati","family":"Rallapalli","sequence":"additional","affiliation":[]},{"given":"Mudhakar","family":"Srivatsa","sequence":"additional","affiliation":[]},{"given":"Carlos","family":"Torres","sequence":"additional","affiliation":[]},{"given":"B.S.","family":"Manjunath","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1037\/0033-295X.113.4.766"},{"key":"ref33","first-page":"510","article-title":"Hollywood in homes: Crowdsourcing data collection for activity understanding","author":"sigurdsson","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref32","first-page":"4967","article-title":"A simple neural network module for relational reasoning","author":"santoro","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref30","article-title":"Deep learning for detecting multiple space-time action tubes in videos","author":"saha","year":"2016","journal-title":"arXiv preprint arXiv 1608 01529"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01252-6_20"},{"key":"ref36","article-title":"Ucf101: A dataset of 101 human actions classes from videos in the wild","author":"soomro","year":"2012","journal-title":"arXiv preprint arXiv 1212 0402"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.393"},{"key":"ref34","first-page":"568","article-title":"Two-stream convolutional networks for action recognition in videos","author":"simonyan","year":"2014","journal-title":"Advances in neural information processing systems"},{"key":"ref10","article-title":"Visual semantic role labeling","author":"gupta","year":"2015","journal-title":"arXiv preprint arXiv 1505 06270"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00132"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1016\/j.bandc.2007.05.001"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.620"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00378"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.351"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.396"},{"journal-title":"Human centric spatiotemporal action localization","year":"0","author":"jiang","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.472"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2014.223"},{"key":"ref19","article-title":"The kinetics human action video dataset","author":"kay","year":"2017","journal-title":"arXiv preprint arXiv 1705 06950"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1523\/JNEUROSCI.5840-12.2013"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1016\/j.visres.2012.12.011"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46493-0_45"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/WACV.2018.00048"},{"key":"ref6","article-title":"The activitynet large-scale activity recognition challenge 2018 summary","author":"ghanem","year":"2018","journal-title":"arXiv preprint arXiv 1808 02194"},{"key":"ref29","first-page":"91","article-title":"Faster r-cnn: Towards real-time object detection with region proposal networks","author":"ren","year":"2015","journal-title":"Advances in neural information processing systems"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"ref8","article-title":"Detecting and recognizing human-object interactions","author":"gkioxari","year":"2017","journal-title":"arXiv preprint arXiv 1704 07333"},{"key":"ref7","article-title":"A better baseline for ava","author":"girdhar","year":"2018","journal-title":"arXiv preprint arXiv 1807 10066"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00633"},{"journal-title":"TensorFlow Large-Scale Machine Learning on Heterogeneous Systems","year":"2015","author":"abadi","key":"ref1"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.319"},{"key":"ref20","article-title":"Adam: A method for stochastic optimization","author":"kingma","year":"2014","journal-title":"arXiv preprint arXiv 1412 6980"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2015.7299101"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2016.2642789"},{"journal-title":"Learning Transferable Architectures for Scalable Image Recognition","year":"0","author":"zoph","key":"ref47"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref24","first-page":"21","article-title":"Ssd: Single shot multibox detector","author":"liu","year":"2016","journal-title":"European Conference on Computer Vision"},{"key":"ref41","first-page":"5998","article-title":"Attention is all you need","author":"vaswani","year":"2017","journal-title":"Advances in neural information processing systems"},{"key":"ref23","first-page":"740","article-title":"Microsoft coco: Common objects in context","author":"lin","year":"2014","journal-title":"European Conference on Computer Vision"},{"key":"ref44","article-title":"Yh technologies at activitynet challenge 2018","author":"yao","year":"2018","journal-title":"arXiv preprint arXiv 1807 00686"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2011.5995586"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2017.8296962"},{"key":"ref25","article-title":"Sgdr: Stochastic gradient descent with warm restarts","author":"loshchilov","year":"2016","journal-title":"arXiv preprint arXiv 1608 03983"}],"event":{"name":"2020 IEEE Winter Conference on Applications of Computer Vision (WACV)","start":{"date-parts":[[2020,3,1]]},"location":"Snowmass Village, CO, USA","end":{"date-parts":[[2020,3,5]]}},"container-title":["2020 IEEE Winter Conference on Applications of Computer Vision (WACV)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/9087828\/9093261\/09093617.pdf?arnumber=9093617","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,6,30]],"date-time":"2022-06-30T15:18:24Z","timestamp":1656602304000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/9093617\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,3]]},"references-count":47,"URL":"https:\/\/doi.org\/10.1109\/wacv45572.2020.9093617","relation":{},"subject":[],"published":{"date-parts":[[2020,3]]}}}