{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,22]],"date-time":"2026-04-22T20:01:39Z","timestamp":1776888099266,"version":"3.51.2"},"reference-count":49,"publisher":"IEEE","license":[{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2023,5,29]],"date-time":"2023-05-29T00:00:00Z","timestamp":1685318400000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2023,5,29]]},"DOI":"10.1109\/icra48891.2023.10160564","type":"proceedings-article","created":{"date-parts":[[2023,7,4]],"date-time":"2023-07-04T13:20:56Z","timestamp":1688476856000},"page":"1312-1318","source":"Crossref","is-referenced-by-count":18,"title":["AZTR: Aerial Video Action Recognition with Auto Zoom and Temporal Reasoning"],"prefix":"10.1109","author":[{"given":"Xijun","family":"Wang","sequence":"first","affiliation":[{"name":"University of Maryland, College Park,Dept. of Computer Science,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Ruiqi","family":"Xian","sequence":"additional","affiliation":[{"name":"University of Maryland, College Park,Dept. of Electrical and Computer Engineering,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Tianrui","family":"Guan","sequence":"additional","affiliation":[{"name":"University of Maryland, College Park,Dept. of Computer Science,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Celso M.","family":"de Melo","sequence":"additional","affiliation":[{"name":"Adelphi,Computational and Information Sciences Directorate, DEVCOM U.S. Army Research Laboratory,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Stephen M.","family":"Nogar","sequence":"additional","affiliation":[{"name":"Adelphi,Computational and Information Sciences Directorate, DEVCOM U.S. Army Research Laboratory,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Aniket","family":"Bera","sequence":"additional","affiliation":[{"name":"Purdue University,Dept. of Computer Science,West Lafayette,IN,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dinesh","family":"Manocha","sequence":"additional","affiliation":[{"name":"University of Maryland, College Park,Dept. of Computer Science,MD,USA"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2019.107140"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1109\/ACCESS.2019.2938249"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1109\/IGARSS39084.2020.9324182"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1016\/j.comcom.2020.03.012"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2016.7487305"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8460516"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.3390\/drones3040082"},{"key":"ref16","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2017.267"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093511"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/THMS.2020.2971958"},{"key":"ref46","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01117"},{"key":"ref45","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00474"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2013.396"},{"key":"ref47","article-title":"Fnet: Mixing tokens with fourier transforms","author":"lee-thorp","year":"2021","journal-title":"ArXiv Preprint"},{"key":"ref42","first-page":"249","article-title":"Walle: An End-to-End, General-Purpose, and Large-Scale production system for Device-Cloud collaborative machine learning","author":"lv","year":"2022","journal-title":"16th USENIX Symposium on Operating Systems Design and Implementation (OSDI 22)"},{"key":"ref41","author":"abadi","year":"2015","journal-title":"TensorFlow Large-Scale Machine Learning on Heterogeneous Systems"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"ref43","article-title":"Rocog-v2","author":"reddy","year":"2022","journal-title":"Under Review"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.368"},{"key":"ref8","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9196856"},{"key":"ref7","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197301"},{"key":"ref9","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811995"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412541"},{"key":"ref3","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01576"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA40945.2020.9197393"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2017.7989361"},{"key":"ref40","first-page":"4651","article-title":"Perceiver: General perception with iterative attention","author":"jaegle","year":"0","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.91"},{"key":"ref34","article-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","author":"howard","year":"2017","journal-title":"ArXiv Preprint"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1002\/ail2.38"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/ICECE51594.2020.9353008"},{"key":"ref31","article-title":"Big-little net: An efficient multi-scale feature representation for visual and speech recognition","author":"chen","year":"2018","journal-title":"ArXiv Preprint"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00044"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00565"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00117"},{"key":"ref2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20083-0_18"},{"key":"ref1","article-title":"The state of aerial surveillance: A survey","author":"nguyen","year":"2022","journal-title":"ArXiv Preprint"},{"key":"ref39","article-title":"Attention is all you need","volume":"30","author":"vaswani","year":"2017","journal-title":"Advances in Neural IInformation Processing Systems"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00718"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19836-6_37"},{"key":"ref23","first-page":"276","article-title":"Fully autonomous uav-based action recognition system using aerial imagery","author":"peng","year":"0","journal-title":"International Symposium on Visual Computing (ISVC)"},{"key":"ref26","article-title":"Detection and tracking meet drones challenge","author":"zhu","year":"2020","journal-title":"ArXiv Preprint"},{"key":"ref25","first-page":"370","article-title":"The unmanned aerial vehicle benchmark: Object detection and tracking","author":"du","year":"0","journal-title":"European Conference on Computer Vision (ECCV)"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01600"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2021.103186"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.1109\/IROS45743.2020.9340728"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-11012-3_9"},{"key":"ref29","article-title":"More is less: Learning efficient video representations by big-little network and depthwise temporal aggregation","volume":"32","author":"fan","year":"2019","journal-title":"Advances in Neural IInformation Processing Systems"}],"event":{"name":"2023 IEEE International Conference on Robotics and Automation (ICRA)","location":"London, United Kingdom","start":{"date-parts":[[2023,5,29]]},"end":{"date-parts":[[2023,6,2]]}},"container-title":["2023 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/10160211\/10160212\/10160564.pdf?arnumber=10160564","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,7,24]],"date-time":"2023-07-24T13:37:24Z","timestamp":1690205844000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10160564\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,5,29]]},"references-count":49,"URL":"https:\/\/doi.org\/10.1109\/icra48891.2023.10160564","relation":{},"subject":[],"published":{"date-parts":[[2023,5,29]]}}}