{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T19:59:08Z","timestamp":1776196748624,"version":"3.50.1"},"reference-count":92,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62376252"],"award-info":[{"award-number":["62376252"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100004731","name":"Natural Science Foundation of Zhejiang Province","doi-asserted-by":"publisher","award":["LZ22F030003"],"award-info":[{"award-number":["LZ22F030003"]}],"id":[{"id":"10.13039\/501100004731","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Advanced Engineering Informatics"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.aei.2026.104639","type":"journal-article","created":{"date-parts":[[2026,3,29]],"date-time":"2026-03-29T13:18:29Z","timestamp":1774790309000},"page":"104639","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PA","title":["IPeDet: An end-to-end fine-grained feature aggregation network for UAV infrared pedestrian detection"],"prefix":"10.1016","volume":"74","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-9465-0869","authenticated-orcid":false,"given":"Yi","family":"Li","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6704-0301","authenticated-orcid":false,"given":"Huiying","family":"Xu","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0033-5260","authenticated-orcid":false,"given":"Xinzhong","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Hongbo","family":"Li","sequence":"additional","affiliation":[]},{"given":"Yiming","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Ruidong","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0000-7918-9106","authenticated-orcid":false,"given":"Lingling","family":"Xu","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.aei.2026.104639_b1","first-page":"1224","article-title":"Prediction of stopping distance for autonomous emergency braking using stereo camera pedestrian detection","volume":"51","author":"Rajendar","year":"2022","journal-title":"Mater. Today: Proc."},{"key":"10.1016\/j.aei.2026.104639_b2","doi-asserted-by":"crossref","DOI":"10.1016\/j.measurement.2022.111418","article-title":"A method for detecting pedestrian height and distance based on monocular vision technology","volume":"199","author":"Shi","year":"2022","journal-title":"Measurement"},{"key":"10.1016\/j.aei.2026.104639_b3","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2021.101356","article-title":"Recognition of pedestrian trajectories and attributes with computer vision and deep learning techniques","volume":"49","author":"Wong","year":"2021","journal-title":"Adv. Eng. Informatics"},{"key":"10.1016\/j.aei.2026.104639_b4","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2024.102953","article-title":"UAV applications in intelligent traffic: RGBT image feature registration and complementary perception","volume":"63","author":"Ji","year":"2025","journal-title":"Adv. Eng. Informatics"},{"key":"10.1016\/j.aei.2026.104639_b5","doi-asserted-by":"crossref","first-page":"108","DOI":"10.1016\/j.patrec.2025.04.015","article-title":"Cross-erasure enhanced network for occluded person re-identification","volume":"193","author":"Zhang","year":"2025","journal-title":"Pattern Recognit. Lett."},{"key":"10.1016\/j.aei.2026.104639_b6","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109819","article-title":"Haar wavelet downsampling: A simple but effective downsampling module for semantic segmentation","volume":"143","author":"Xu","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.aei.2026.104639_b7","series-title":"2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition","first-page":"886","article-title":"Histograms of oriented gradients for human detection","volume":"Vol. 1","author":"Dalal","year":"2005"},{"key":"10.1016\/j.aei.2026.104639_b8","series-title":"Proceedings of 12th International Conference on Pattern Recognition","first-page":"582","article-title":"Performance evaluation of texture measures with classification based on Kullback discrimination of distributions","volume":"Vol. 1","author":"Ojala","year":"1994"},{"key":"10.1016\/j.aei.2026.104639_b9","series-title":"Proceedings of the Seventh IEEE International Conference on Computer Vision","first-page":"1150","article-title":"Object recognition from local scale-invariant features","volume":"Vol. 2","author":"Lowe","year":"1999"},{"key":"10.1016\/j.aei.2026.104639_b10","series-title":"2018 24th International Conference on Pattern Recognition","first-page":"2705","article-title":"Infrared and visible image fusion using a deep learning framework","author":"Li","year":"2018"},{"key":"10.1016\/j.aei.2026.104639_b11","series-title":"ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"4468","article-title":"Robust thermal infrared pedestrian detection by associating visible pedestrian knowledge","author":"Park","year":"2022"},{"issue":"10","key":"10.1016\/j.aei.2026.104639_b12","doi-asserted-by":"crossref","first-page":"3333","DOI":"10.1109\/TPAMI.2020.2984244","article-title":"Deep convolutional neural network for multi-modal image restoration and fusion","volume":"43","author":"Deng","year":"2020","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"2","key":"10.1016\/j.aei.2026.104639_b13","doi-asserted-by":"crossref","first-page":"87","DOI":"10.1109\/MGRS.2022.3145502","article-title":"Single-frame infrared small-target detection: A survey","volume":"10","author":"Zhao","year":"2022","journal-title":"IEEE Geosci. Remote. Sens. Mag."},{"key":"10.1016\/j.aei.2026.104639_b14","first-page":"1","article-title":"SCAFNet: A semantic compensated adaptive fusion network for remote sensing images change detection","volume":"23","author":"Zhang","year":"2026","journal-title":"IEEE Geosci. Remote. Sens. Lett."},{"key":"10.1016\/j.aei.2026.104639_b15","first-page":"1","article-title":"Adaptive downsampling and scale enhanced detection head for tiny object detection in remote sensing image","volume":"22","author":"Zhang","year":"2025","journal-title":"IEEE Geosci. Remote. Sens. Lett."},{"key":"10.1016\/j.aei.2026.104639_b16","first-page":"1","article-title":"Real-time object detection network in UAV-vision based on CNN and transformer","volume":"72","author":"Ye","year":"2023","journal-title":"IEEE Trans. Instrum. Meas."},{"key":"10.1016\/j.aei.2026.104639_b17","first-page":"1","article-title":"YoloOW: A spatial scale adaptive real-time object detection neural network for open water search and rescue from UAV aerial imagery","volume":"62","author":"Xu","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.aei.2026.104639_b18","first-page":"1","article-title":"EFLNet: Enhancing feature learning network for infrared small target detection","volume":"62","author":"Yang","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.aei.2026.104639_b19","first-page":"1","article-title":"SDS-Net: Shallow\u2013deep synergism-detection network for infrared small target detection","volume":"63","author":"Yue","year":"2025","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"issue":"6","key":"10.1016\/j.aei.2026.104639_b20","doi-asserted-by":"crossref","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","article-title":"Faster R-CNN: Towards real-time object detection with region proposal networks","volume":"39","author":"Ren","year":"2016","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.aei.2026.104639_b21","doi-asserted-by":"crossref","unstructured":"T.-Y. Lin, P. Doll\u00e1r, R. Girshick, K. He, B. Hariharan, S. Belongie, Feature pyramid networks for object detection, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 2117\u20132125.","DOI":"10.1109\/CVPR.2017.106"},{"key":"10.1016\/j.aei.2026.104639_b22","doi-asserted-by":"crossref","unstructured":"K. He, G. Gkioxari, P. Doll\u00e1r, R. Girshick, Mask r-cnn, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 2961\u20132969.","DOI":"10.1109\/ICCV.2017.322"},{"key":"10.1016\/j.aei.2026.104639_b23","series-title":"Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, the Netherlands, October 11\u201314, 2016, ProceediFngs, Part I 14","first-page":"21","article-title":"Ssd: Single shot multibox detector","author":"Liu","year":"2016"},{"key":"10.1016\/j.aei.2026.104639_b24","doi-asserted-by":"crossref","unstructured":"T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Doll\u00e1r, Focal loss for dense object detection, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 2980\u20132988.","DOI":"10.1109\/ICCV.2017.324"},{"key":"10.1016\/j.aei.2026.104639_b25","doi-asserted-by":"crossref","unstructured":"K. Duan, S. Bai, L. Xie, H. Qi, Q. Huang, Q. Tian, Centernet: Keypoint triplets for object detection, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2019, pp. 6569\u20136578.","DOI":"10.1109\/ICCV.2019.00667"},{"key":"10.1016\/j.aei.2026.104639_b26","doi-asserted-by":"crossref","unstructured":"M. Tan, R. Pang, Q.V. Le, Efficientdet: Scalable and efficient object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 10781\u201310790.","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"10.1016\/j.aei.2026.104639_b27","doi-asserted-by":"crossref","unstructured":"J. Redmon, You only look once: Unified, real-time object detection, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2016.","DOI":"10.1109\/CVPR.2016.91"},{"key":"10.1016\/j.aei.2026.104639_b28","series-title":"Yolov11: An overview of the key architectural enhancements","author":"Khanam","year":"2024"},{"key":"10.1016\/j.aei.2026.104639_b29","series-title":"Yolov12: Attention-centric real-time object detectors","author":"Tian","year":"2025"},{"key":"10.1016\/j.aei.2026.104639_b30","article-title":"Lraf-net: Long-range attention fusion network for visible\u2013infrared object detection","author":"Fu","year":"2023","journal-title":"IEEE Trans. Neural Networks Learn. Syst."},{"key":"10.1016\/j.aei.2026.104639_b31","doi-asserted-by":"crossref","DOI":"10.1016\/j.measurement.2023.113442","article-title":"Infrared pedestrian detection using improved UNet and YOLO through sharing visible light domain information","volume":"221","author":"Wei","year":"2023","journal-title":"Measurement"},{"key":"10.1016\/j.aei.2026.104639_b32","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.125291","article-title":"Implicit modality knowledge alignment and uncertainty estimation for visible-infrared person re-identification","volume":"259","author":"Wu","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.aei.2026.104639_b33","doi-asserted-by":"crossref","DOI":"10.1016\/j.sigpro.2024.109620","article-title":"Pedestrian detection-driven cascade network for infrared and visible image fusion","volume":"225","author":"Zheng","year":"2024","journal-title":"Signal Process."},{"key":"10.1016\/j.aei.2026.104639_b34","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2021.107089","article-title":"Infrared pedestrian segmentation algorithm based on the two-dimensional Kaniadakis entropy thresholding","volume":"225","author":"Lei","year":"2021","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.aei.2026.104639_b35","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2022.109337","article-title":"Cross-modality disentanglement and shared feedback learning for infrared-visible person re-identification","volume":"252","author":"Li","year":"2022","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.aei.2026.104639_b36","doi-asserted-by":"crossref","DOI":"10.1016\/j.measurement.2025.118009","article-title":"Pedestrian detection and tracking using an enhanced YOLOv9 model for automotive vehicles","author":"Farhat","year":"2025","journal-title":"Measurement"},{"issue":"4","key":"10.1016\/j.aei.2026.104639_b37","doi-asserted-by":"crossref","first-page":"834","DOI":"10.1109\/TPAMI.2017.2699184","article-title":"Deeplab: Semantic image segmentation with deep convolutional nets, atrous convolution, and fully connected crfs","volume":"40","author":"Chen","year":"2017","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.aei.2026.104639_b38","doi-asserted-by":"crossref","unstructured":"H. Zhao, J. Shi, X. Qi, X. Wang, J. Jia, Pyramid scene parsing network, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 2881\u20132890.","DOI":"10.1109\/CVPR.2017.660"},{"key":"10.1016\/j.aei.2026.104639_b39","doi-asserted-by":"crossref","unstructured":"S. Liu, L. Qi, H. Qin, J. Shi, J. Jia, Path aggregation network for instance segmentation, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 8759\u20138768.","DOI":"10.1109\/CVPR.2018.00913"},{"key":"10.1016\/j.aei.2026.104639_b40","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.128564","article-title":"Spatiotemporal dual-branch feature-guided fusion network for driver attention prediction","volume":"292","author":"Zhang","year":"2025","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.aei.2026.104639_b41","series-title":"Mobilenets: Efficient convolutional neural networks for mobile vision applications","author":"Howard","year":"2017"},{"key":"10.1016\/j.aei.2026.104639_b42","doi-asserted-by":"crossref","unstructured":"X. Zhang, X. Zhou, M. Lin, J. Sun, Shufflenet: An extremely efficient convolutional neural network for mobile devices, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 6848\u20136856.","DOI":"10.1109\/CVPR.2018.00716"},{"key":"10.1016\/j.aei.2026.104639_b43","doi-asserted-by":"crossref","unstructured":"K. Han, Y. Wang, Q. Tian, J. Guo, C. Xu, C. Xu, Ghostnet: More features from cheap operations, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 1580\u20131589.","DOI":"10.1109\/CVPR42600.2020.00165"},{"key":"10.1016\/j.aei.2026.104639_b44","doi-asserted-by":"crossref","unstructured":"J. Chen, S.-h. Kao, H. He, W. Zhuo, S. Wen, C.-H. Lee, S.-H.G. Chan, Run, don\u2019t walk: Chasing higher FLOPS for faster neural networks, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 12021\u201312031.","DOI":"10.1109\/CVPR52729.2023.01157"},{"key":"10.1016\/j.aei.2026.104639_b45","series-title":"Mobilevit: light-weight, general-purpose, and mobile-friendly vision transformer","author":"Mehta","year":"2021"},{"key":"10.1016\/j.aei.2026.104639_b46","doi-asserted-by":"crossref","unstructured":"H. He, J. Zhang, Y. Cai, H. Chen, X. Hu, Z. Gan, Y. Wang, C. Wang, Y. Wu, L. Xie, MobileMamba: Lightweight Multi-Receptive Visual Mamba Network, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2025, pp. 4497\u20134507.","DOI":"10.1109\/CVPR52734.2025.00424"},{"key":"10.1016\/j.aei.2026.104639_b47","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1109\/TGRS.2024.3494868","article-title":"A lightweight CNN\u2013transformer network with Laplacian loss for low-altitude UAV imagery semantic segmentation","volume":"62","author":"Lu","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.aei.2026.104639_b48","doi-asserted-by":"crossref","DOI":"10.1016\/j.aei.2025.103544","article-title":"AirboardNet: A UAV onboard girder inspection approach for high-speed railroad bridge using multi-task knowledge distillation","volume":"67","author":"Wu","year":"2025","journal-title":"Adv. Eng. Informatics"},{"issue":"1","key":"10.1016\/j.aei.2026.104639_b49","doi-asserted-by":"crossref","first-page":"306","DOI":"10.1109\/TCC.2024.3361858","article-title":"Edge-cloud collaborative UAV object detection: Edge-embedded lightweight algorithm design and task offloading using fuzzy neural network","volume":"12","author":"Yuan","year":"2024","journal-title":"IEEE Trans. Cloud Comput."},{"key":"10.1016\/j.aei.2026.104639_b50","first-page":"1","article-title":"A lightweight network with latent representations for UAV thermal image super-resolution","volume":"62","author":"Sang","year":"2024","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.aei.2026.104639_b51","first-page":"1","article-title":"Self-attention guidance and multiscale feature fusion-based UAV image object detection","volume":"20","author":"Zhang","year":"2023","journal-title":"IEEE Geosci. Remote. Sens. Lett."},{"key":"10.1016\/j.aei.2026.104639_b52","doi-asserted-by":"crossref","unstructured":"Y. Zhao, W. Lv, S. Xu, J. Wei, G. Wang, Q. Dang, Y. Liu, J. Chen, Detrs beat yolos on real-time object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 16965\u201316974.","DOI":"10.1109\/CVPR52733.2024.01605"},{"key":"10.1016\/j.aei.2026.104639_b53","first-page":"1","article-title":"ORSI salient object detection via progressive interaction and saliency-guided enhancement","volume":"23","author":"Zhang","year":"2026","journal-title":"IEEE Geosci. Remote. Sens. Lett."},{"key":"10.1016\/j.aei.2026.104639_b54","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2023.121352","article-title":"Large separable kernel attention: Rethinking the large kernel attention design in cnn","volume":"236","author":"Lau","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.aei.2026.104639_b55","doi-asserted-by":"crossref","unstructured":"M. Hu, J. Feng, J. Hua, B. Lai, J. Huang, X. Gong, X.-S. Hua, Online convolutional re-parameterization, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 568\u2013577.","DOI":"10.1109\/CVPR52688.2022.00065"},{"key":"10.1016\/j.aei.2026.104639_b56","article-title":"How does batch normalization help optimization?","volume":"31","author":"Santurkar","year":"2018","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.aei.2026.104639_b57","doi-asserted-by":"crossref","unstructured":"Q. Hou, D. Zhou, J. Feng, Coordinate attention for efficient mobile network design, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 13713\u201313722.","DOI":"10.1109\/CVPR46437.2021.01350"},{"issue":"1","key":"10.1016\/j.aei.2026.104639_b58","doi-asserted-by":"crossref","first-page":"227","DOI":"10.1038\/s41597-023-02066-6","article-title":"HIT-UAV: A high-altitude infrared thermal dataset for unmanned aerial vehicle-based object detection","volume":"10","author":"Suo","year":"2023","journal-title":"Sci. Data"},{"key":"10.1016\/j.aei.2026.104639_b59","doi-asserted-by":"crossref","unstructured":"J. Liu, X. Fan, Z. Huang, G. Wu, R. Liu, W. Zhong, Z. Luo, Target-aware dual adversarial learning and a multi-scenario multi-modality benchmark to fuse infrared and visible for object detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 5802\u20135811.","DOI":"10.1109\/CVPR52688.2022.00571"},{"key":"10.1016\/j.aei.2026.104639_b60","series-title":"YOLOv5 by ultralytics","author":"Jocher","year":"2020"},{"key":"10.1016\/j.aei.2026.104639_b61","series-title":"YOLOv6: A single-stage object detection framework for industrial applications","author":"Li","year":"2022"},{"key":"10.1016\/j.aei.2026.104639_b62","doi-asserted-by":"crossref","unstructured":"C.-Y. Wang, A. Bochkovskiy, H.-Y.M. Liao, YOLOv7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 7464\u20137475.","DOI":"10.1109\/CVPR52729.2023.00721"},{"key":"10.1016\/j.aei.2026.104639_b63","series-title":"Ultralytics YOLO","author":"Jocher","year":"2023"},{"key":"10.1016\/j.aei.2026.104639_b64","series-title":"YOLOv9: Learning what you want to learn using programmable gradient information","author":"Wang","year":"2024"},{"key":"10.1016\/j.aei.2026.104639_b65","series-title":"Yolov10: Real-time end-to-end object detection","author":"Wang","year":"2024"},{"key":"10.1016\/j.aei.2026.104639_b66","doi-asserted-by":"crossref","unstructured":"Z. Cai, N. Vasconcelos, Cascade r-cnn: Delving into high quality object detection, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 6154\u20136162.","DOI":"10.1109\/CVPR.2018.00644"},{"key":"10.1016\/j.aei.2026.104639_b67","series-title":"Deformable detr: Deformable transformers for end-to-end object detection","author":"Zhu","year":"2020"},{"key":"10.1016\/j.aei.2026.104639_b68","doi-asserted-by":"crossref","unstructured":"P. Sun, R. Zhang, Y. Jiang, T. Kong, C. Xu, W. Zhan, M. Tomizuka, L. Li, Z. Yuan, C. Wang, et al., Sparse r-cnn: End-to-end object detection with learnable proposals, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 14454\u201314463.","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"10.1016\/j.aei.2026.104639_b69","series-title":"Dino: Detr with improved denoising anchor boxes for end-to-end object detection","author":"Zhang","year":"2022"},{"key":"10.1016\/j.aei.2026.104639_b70","article-title":"Hyper-yolo: When visual object detection meets hypergraph computation","author":"Feng","year":"2024","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"6","key":"10.1016\/j.aei.2026.104639_b71","doi-asserted-by":"crossref","DOI":"10.3390\/drones8060240","article-title":"PHSI-RTDETR: A lightweight infrared small target detection algorithm based on UAV aerial photography","volume":"8","author":"Wang","year":"2024","journal-title":"Drones"},{"key":"10.1016\/j.aei.2026.104639_b72","first-page":"1","article-title":"CSFPR-RTDETR: Real-time small object detection network for UAV images based on cross-spatial-frequency domain and position relation","volume":"63","author":"Hu","year":"2025","journal-title":"IEEE Trans. Geosci. Remote Sens."},{"key":"10.1016\/j.aei.2026.104639_b73","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2025.129710","article-title":"Freq-DETR: Frequency-aware transformer for real-time small object detection in unmanned aerial vehicle imagery","volume":"298","author":"Chen","year":"2026","journal-title":"Expert Syst. Appl."},{"issue":"20","key":"10.1016\/j.aei.2026.104639_b74","doi-asserted-by":"crossref","DOI":"10.3390\/rs17203476","article-title":"DFAS-YOLO: Dual feature-aware sampling for small-object detection in remote sensing images","volume":"17","author":"Liu","year":"2025","journal-title":"Remote. Sens."},{"key":"10.1016\/j.aei.2026.104639_b75","series-title":"DEIM: DETR with improved matching for fast convergence","author":"Shihua","year":"2025"},{"key":"10.1016\/j.aei.2026.104639_b76","series-title":"European Conference on Computer Vision","first-page":"78","article-title":"MobileNetV4: Universal models for the mobile ecosystem","author":"Qin","year":"2025"},{"key":"10.1016\/j.aei.2026.104639_b77","doi-asserted-by":"crossref","unstructured":"X. Liu, H. Peng, N. Zheng, Y. Yang, H. Hu, Y. Yuan, Efficientvit: Memory efficient vision transformer with cascaded group attention, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 14420\u201314430.","DOI":"10.1109\/CVPR52729.2023.01386"},{"key":"10.1016\/j.aei.2026.104639_b78","doi-asserted-by":"crossref","unstructured":"N. Ma, X. Zhang, H.-T. Zheng, J. Sun, Shufflenet v2: Practical guidelines for efficient cnn architecture design, in: Proceedings of the European Conference on Computer Vision, ECCV, 2018, pp. 116\u2013131.","DOI":"10.1007\/978-3-030-01264-9_8"},{"key":"10.1016\/j.aei.2026.104639_b79","series-title":"ICASSP 2023-2023 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1","article-title":"Efficient multi-scale attention module with cross-spatial learning","author":"Ouyang","year":"2023"},{"key":"10.1016\/j.aei.2026.104639_b80","doi-asserted-by":"crossref","unstructured":"J. Hu, L. Shen, G. Sun, Squeeze-and-excitation networks, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2018, pp. 7132\u20137141.","DOI":"10.1109\/CVPR.2018.00745"},{"key":"10.1016\/j.aei.2026.104639_b81","doi-asserted-by":"crossref","unstructured":"S. Woo, J. Park, J.-Y. Lee, I.S. Kweon, Cbam: Convolutional block attention module, in: Proceedings of the European Conference on Computer Vision, ECCV, 2018, pp. 3\u201319.","DOI":"10.1007\/978-3-030-01234-2_1"},{"issue":"3","key":"10.1016\/j.aei.2026.104639_b82","doi-asserted-by":"crossref","first-page":"62","DOI":"10.1007\/s11554-024-01436-6","article-title":"Slim-neck by GSConv: A lightweight-design for real-time detector architectures","volume":"21","author":"Li","year":"2024","journal-title":"J. Real-Time Image Process."},{"key":"10.1016\/j.aei.2026.104639_b83","doi-asserted-by":"crossref","unstructured":"Y. Chen, X. Dai, M. Liu, D. Chen, L. Yuan, Z. Liu, Dynamic convolution: Attention over convolution kernels, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2020, pp. 11030\u201311039.","DOI":"10.1109\/CVPR42600.2020.01104"},{"key":"10.1016\/j.aei.2026.104639_b84","series-title":"RFAConv: Innovating spatial attention and standard convolutional operation","author":"Zhang","year":"2023"},{"key":"10.1016\/j.aei.2026.104639_b85","series-title":"AKConv: Convolutional kernel with arbitrary sampled shapes and arbitrary number of parameters","author":"Zhang","year":"2023"},{"key":"10.1016\/j.aei.2026.104639_b86","article-title":"Hybrid convolutional and attention network for hyperspectral image denoising","author":"Hu","year":"2024","journal-title":"IEEE Geosci. Remote. Sens. Lett."},{"key":"10.1016\/j.aei.2026.104639_b87","series-title":"Joint European Conference on Machine Learning and Knowledge Discovery in Databases","first-page":"443","article-title":"No more strided convolutions or pooling: A new CNN building block for low-resolution images and small objects","author":"Sunkara","year":"2022"},{"key":"10.1016\/j.aei.2026.104639_b88","doi-asserted-by":"crossref","unstructured":"X. Cai, Q. Lai, Y. Wang, W. Wang, Z. Sun, Y. Yao, Poly kernel inception network for remote sensing detection, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 27706\u201327716.","DOI":"10.1109\/CVPR52733.2024.02617"},{"key":"10.1016\/j.aei.2026.104639_b89","doi-asserted-by":"crossref","unstructured":"W. Lin, Z. Wu, J. Chen, J. Huang, L. Jin, Scale-Aware Modulation Meet Transformer, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, ICCV, 2023, pp. 6015\u20136026.","DOI":"10.1109\/ICCV51070.2023.00553"},{"key":"10.1016\/j.aei.2026.104639_b90","doi-asserted-by":"crossref","unstructured":"X. Zhu, H. Hu, S. Lin, J. Dai, Deformable convnets v2: More deformable, better results, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2019, pp. 9308\u20139316.","DOI":"10.1109\/CVPR.2019.00953"},{"issue":"4","key":"10.1016\/j.aei.2026.104639_b91","article-title":"Fcos: A simple and strong anchor-free object detector","volume":"44","author":"Detector","year":"2022","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"10.1016\/j.aei.2026.104639_b92","doi-asserted-by":"crossref","unstructured":"R.R. Selvaraju, M. Cogswell, A. Das, R. Vedantam, D. Parikh, D. Batra, Grad-cam: Visual explanations from deep networks via gradient-based localization, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 618\u2013626.","DOI":"10.1109\/ICCV.2017.74"}],"container-title":["Advanced Engineering Informatics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626003319?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S1474034626003319?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,14]],"date-time":"2026-04-14T19:06:33Z","timestamp":1776193593000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S1474034626003319"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":92,"alternative-id":["S1474034626003319"],"URL":"https:\/\/doi.org\/10.1016\/j.aei.2026.104639","relation":{},"ISSN":["1474-0346"],"issn-type":[{"value":"1474-0346","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"IPeDet: An end-to-end fine-grained feature aggregation network for UAV infrared pedestrian detection","name":"articletitle","label":"Article Title"},{"value":"Advanced Engineering Informatics","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.aei.2026.104639","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"104639"}}