{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,5]],"date-time":"2026-06-05T04:53:40Z","timestamp":1780635220293,"version":"3.54.1"},"reference-count":62,"publisher":"Institute of Electrical and Electronics Engineers (IEEE)","issue":"12","license":[{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/ieeexplore.ieee.org\/Xplorehelp\/downloads\/license-information\/IEEE.html"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,12,1]],"date-time":"2024-12-01T00:00:00Z","timestamp":1733011200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"funder":[{"DOI":"10.13039\/100010684","name":"European Union Horizon 2020 Teaming, KIOS CoE","doi-asserted-by":"publisher","award":["739551"],"award-info":[{"award-number":["739551"]}],"id":[{"id":"10.13039\/100010684","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":["IEEE Trans. Artif. Intell."],"published-print":{"date-parts":[[2024,12]]},"DOI":"10.1109\/tai.2024.3454566","type":"journal-article","created":{"date-parts":[[2024,9,5]],"date-time":"2024-09-05T14:55:41Z","timestamp":1725548141000},"page":"6159-6171","source":"Crossref","is-referenced-by-count":4,"title":["Spatiotemporal Object Detection for Improved Aerial Vehicle Detection in Traffic Monitoring"],"prefix":"10.1109","volume":"5","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-9563-5374","authenticated-orcid":false,"given":"Kristina","family":"Telegraph","sequence":"first","affiliation":[{"name":"KIOS Research and Innovation Center of Excellence, University of Cyprus, Nicosia, Cyprus"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7926-7642","authenticated-orcid":false,"given":"Christos","family":"Kyrkou","sequence":"additional","affiliation":[{"name":"KIOS Research and Innovation Center of Excellence, University of Cyprus, Nicosia, Cyprus"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"263","reference":[{"key":"ref1","first-page":"580","article-title":"Rich feature hierarchies for accurate object detection and semantic segmentation","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit.","author":"Girshick","year":"2014"},{"key":"ref2","first-page":"1440","article-title":"Fast R-CNN","volume-title":"Proc. IEEE Int. Conf. Comput. Vis. (ICCV)","author":"Girshick","year":"2015"},{"key":"ref3","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"ref4","first-page":"779","article-title":"You only look once: Unified, real-time object detection","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"Redmon","year":"2016"},{"key":"ref5","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2017.690","article-title":"YOLO9000: Better, faster, stronger","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"Redmon","year":"2017"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.4324\/9780203978948-13"},{"key":"ref7","article-title":"YOLOv4: Optimal speed and accuracy of object detection","author":"Bochkovskiy","year":"2020"},{"key":"ref8","article-title":"YOLOv5 by ultralytics","author":"Jocher","year":"2020"},{"key":"ref9","article-title":"YOLOv6: A single-stage object detection framework for industrial applications","author":"Li","year":"2022"},{"key":"ref10","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.3390\/make5040083"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.48047\/ijfans\/v11\/i12\/207"},{"key":"ref14","first-page":"26183","article-title":"You only look at one sequence: Rethinking transformer in vision through object detection","volume":"34","author":"Fang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"ref15","article-title":"Siamese learning visual tracking: A survey","author":"Pflugfelder","year":"2017"},{"key":"ref16","article-title":"Seq-NMS for video object detection","author":"Han","year":"2016"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.101"},{"key":"ref18","doi-asserted-by":"crossref","first-page":"2896","DOI":"10.1109\/TCSVT.2017.2736553","article-title":"T-CNN: Tubelets with convolutional neural networks for object detection from videos","volume":"28","author":"Kang","year":"2018","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.52"},{"key":"ref20","doi-asserted-by":"crossref","DOI":"10.1109\/CVPR.2017.441","article-title":"Deep feature flow for video recognition","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit. (CVPR)","author":"Zhu","year":"2017"},{"key":"ref21","article-title":"Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting","author":"Shi","year":"2015"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.257"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1145\/3243394.3243692"},{"key":"ref24","doi-asserted-by":"publisher","DOI":"10.1109\/WACV51458.2022.00349"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/ICCE.2018.8326145"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/MPOT.2018.2850386"},{"key":"ref27","first-page":"21","article-title":"SSD: Single shot multibox Detect.","author":"Liu","year":"2016","journal-title":"Comput. Vis. \u2013 ECCV"},{"key":"ref28","article-title":"Stand-alone self-attention in vision models","volume":"32","author":"Ramachandran","year":"2019","journal-title":"Adv. Neur. Inf. Process. Syst."},{"key":"ref29","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00338"},{"key":"ref30","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-023-10595-0"},{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1109\/TNNLS.2021.3053249"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1997.9.8.1735"},{"key":"ref33","first-page":"5686","article-title":"Mobile video object detection with temporally-aware feature maps","volume-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit.","author":"Liu","year":"2018"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/ICIP.2019.8802920"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00678"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01309"},{"issue":"1","key":"ref37","doi-asserted-by":"crossref","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","article-title":"3D convolutional neural networks for human action recognition","volume":"35","author":"Ji","year":"2013","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"ref38","doi-asserted-by":"crossref","DOI":"10.1109\/ICCV.2015.510","article-title":"Learning spatiotemporal features with 3D convolutional networks","volume-title":"Proc. IEEE Int. Conf. Comput. Vis. (ICCV)","author":"Tran","year":"2015"},{"key":"ref39","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2020.3029799"},{"key":"ref40","doi-asserted-by":"publisher","DOI":"10.1016\/j.compenvurbsys.2021.101754"},{"key":"ref41","first-page":"4003","article-title":"ClusterNet: Detecting small objects in large scenes by exploiting spatio-temporal information","volume-title":"Proc. IEEE\/CVF Conf. Comput. Vis. Pattern Recognit.","author":"LaLonde","year":"2018"},{"key":"ref42","first-page":"1","article-title":"Exploiting temporal context for tiny object detection","volume-title":"Proc. IEEE\/CVF Winter Conf. Appl. Comput. Vis. Workshops (WACVW)","author":"Corsel","year":"2023"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2021.03.091"},{"key":"ref44","doi-asserted-by":"publisher","DOI":"10.1007\/s41095-022-0271-y"},{"key":"ref45","article-title":"Recurrent models of visual attention","author":"Mnih","year":"2014"},{"key":"ref46","article-title":"Spatial transformer networks","volume":"28","author":"Jaderberg","year":"2015","journal-title":"Advances in neural Inf. Process. Syst."},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2019.2913372"},{"key":"ref48","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01155"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.48550\/ARXIV.1807.06521"},{"key":"ref50","first-page":"5998","article-title":"Attention is all you need","volume":"30","author":"Vaswani","year":"2017","journal-title":"Neural Inf. Process. Syst."},{"key":"ref51","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00813"},{"key":"ref52","article-title":"An image is worth 16x16 words: Transformers for image recognition at scale","author":"Dosovitskiy","year":"2020"},{"key":"ref53","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00061"},{"key":"ref54","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2102.05095"},{"key":"ref56","first-page":"6816","article-title":"Vivit: A video vision transformer","volume-title":"Proc. IEEE\/CVF Int. Conf. Comput. Vis. (ICCV)","author":"Arnab","year":"2021"},{"key":"ref57","doi-asserted-by":"publisher","DOI":"10.1109\/ICCVW54120.2021.00319"},{"key":"ref58","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2022.3168279"},{"key":"ref59","doi-asserted-by":"publisher","DOI":"10.1109\/ICUAS57906.2023.10156376"},{"key":"ref60","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW50498.2020.00203"},{"key":"ref61","doi-asserted-by":"publisher","DOI":"10.3390\/electronics10030279"},{"key":"ref62","doi-asserted-by":"publisher","DOI":"10.1109\/IWSSIP48289.2020.9145130"}],"container-title":["IEEE Transactions on Artificial Intelligence"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/9078688\/10794552\/10666729.pdf?arnumber=10666729","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,8,23]],"date-time":"2025-08-23T01:09:27Z","timestamp":1755911367000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/10666729\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12]]},"references-count":62,"journal-issue":{"issue":"12"},"URL":"https:\/\/doi.org\/10.1109\/tai.2024.3454566","relation":{},"ISSN":["2691-4581"],"issn-type":[{"value":"2691-4581","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,12]]}}}