{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,1]],"date-time":"2026-03-01T06:02:47Z","timestamp":1772344967910,"version":"3.50.1"},"reference-count":45,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2024,4,1]],"date-time":"2024-04-01T00:00:00Z","timestamp":1711929600000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100005230","name":"Natural Science Foundation of Chongqing Municipality","doi-asserted-by":"publisher","award":["CSTB2022NSCQ-MSX1200"],"award-info":[{"award-number":["CSTB2022NSCQ-MSX1200"]}],"id":[{"id":"10.13039\/501100005230","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007957","name":"Chongqing Municipal Education Commission","doi-asserted-by":"publisher","award":["KJQN202200537"],"award-info":[{"award-number":["KJQN202200537"]}],"id":[{"id":"10.13039\/501100007957","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100007957","name":"Chongqing Municipal Education Commission","doi-asserted-by":"publisher","award":["KJZD- M202300502"],"award-info":[{"award-number":["KJZD- M202300502"]}],"id":[{"id":"10.13039\/501100007957","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100012669","name":"Natural Science Foundation Project of Chongqing","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100012669","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/100010338","name":"Chongqing Normal University","doi-asserted-by":"publisher","award":["21XLB035"],"award-info":[{"award-number":["21XLB035"]}],"id":[{"id":"10.13039\/100010338","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Image and Vision Computing"],"published-print":{"date-parts":[[2024,4]]},"DOI":"10.1016\/j.imavis.2024.104966","type":"journal-article","created":{"date-parts":[[2024,2,28]],"date-time":"2024-02-28T11:32:50Z","timestamp":1709119970000},"page":"104966","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":16,"special_numbering":"C","title":["Improved YOLOv7 models based on modulated deformable convolution and swin transformer for object detection in fisheye images"],"prefix":"10.1016","volume":"144","author":[{"given":"Jie","family":"Zhou","sequence":"first","affiliation":[]},{"given":"Degang","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Tingting","family":"Song","sequence":"additional","affiliation":[]},{"given":"Yichen","family":"Ye","sequence":"additional","affiliation":[]},{"given":"Xin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Yingze","family":"Song","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.imavis.2024.104966_bb0005","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2020.102968","article-title":"Intelligent video analysis: a pedestrian trajectory extraction method for the whole indoor space without blind areas","volume":"196","author":"Yang","year":"2020","journal-title":"Comput. Vis. Image Underst."},{"issue":"12","key":"10.1016\/j.imavis.2024.104966_bb0010","doi-asserted-by":"crossref","first-page":"1585","DOI":"10.1111\/mice.12686","article-title":"Autonomous detection of damage to multiple steel surfaces from 360 panoramas using deep neural networks","volume":"36","author":"Luo","year":"2021","journal-title":"Comput.-Aided Civ. Inf."},{"issue":"19","key":"10.1016\/j.imavis.2024.104966_bb0015","doi-asserted-by":"crossref","first-page":"3177","DOI":"10.3390\/rs12193177","article-title":"Early fire detection based on aerial 360-degree sensors, deep convolution neural networks and exploitation of fire dynamic textures","volume":"12","author":"Barmpoutis","year":"2020","journal-title":"Remote Sens."},{"issue":"6","key":"10.1016\/j.imavis.2024.104966_bb0020","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3414685.3417770","article-title":"Omniphotos: casual 360 vr photography","volume":"39","author":"Bertel","year":"2020","journal-title":"ACM Trans. Graph."},{"issue":"1","key":"10.1016\/j.imavis.2024.104966_bb0025","doi-asserted-by":"crossref","first-page":"118","DOI":"10.1109\/JSTSP.2019.2957952","article-title":"Video coding optimization for virtual reality 360-degree source","volume":"14","author":"Zhou","year":"2019","journal-title":"IEEE J. Sel. Topics Signal Proc."},{"issue":"2","key":"10.1016\/j.imavis.2024.104966_bb0030","doi-asserted-by":"crossref","first-page":"2830","DOI":"10.1109\/LRA.2021.3062324","article-title":"Omnidet: surround view cameras based multi-task visual perception network for autonomous driving","volume":"6","author":"Kumar","year":"2021","journal-title":"IEEE Robot. Autom. Lett."},{"key":"10.1016\/j.imavis.2024.104966_bb0035","series-title":"2019 International Conference on Robotics and Automation (ICRA)","first-page":"6087","article-title":"Real-time dense mapping for self-driving vehicles using fisheye cameras","author":"Cui","year":"2019"},{"key":"10.1016\/j.imavis.2024.104966_bb0040","doi-asserted-by":"crossref","first-page":"14","DOI":"10.1016\/j.imavis.2017.07.003","article-title":"3d visual perception for self-driving cars using a multi-camera system: calibration, mapping, localization, and obstacle detection","volume":"68","author":"H\u00e4ne","year":"2017","journal-title":"Image Vis. Comput."},{"issue":"4","key":"10.1016\/j.imavis.2024.104966_bb0045","doi-asserted-by":"crossref","first-page":"497","DOI":"10.1002\/rob.21946","article-title":"Autonomous aerial robot using dual-fisheye cameras","volume":"37","author":"Gao","year":"2020","journal-title":"J. Field Robot."},{"issue":"3","key":"10.1016\/j.imavis.2024.104966_bb0050","first-page":"4241","article-title":"Silhonet-fisheye: adaptation of a roi based object pose estimation network to monocular fisheye images","volume":"5","author":"Billings","year":"2020","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"2","key":"10.1016\/j.imavis.2024.104966_bb0055","doi-asserted-by":"crossref","first-page":"1303","DOI":"10.1109\/LRA.2020.2967657","article-title":"Variational fisheye stereo","volume":"5","author":"Roxas","year":"2020","journal-title":"IEEE Robot. Autom. Lett."},{"issue":"9","key":"10.1016\/j.imavis.2024.104966_bb0060","doi-asserted-by":"crossref","first-page":"1037","DOI":"10.1177\/0278364920915248","article-title":"Panoramis: an ultra-wide field of view image dataset for vision-based robot-motion estimation","volume":"39","author":"Benseddik","year":"2020","journal-title":"Int. J. Robot. Res."},{"key":"10.1016\/j.imavis.2024.104966_bb0065","doi-asserted-by":"crossref","DOI":"10.1108\/EC-09-2018-0431","article-title":"Analysis for center deviation of circular target under perspective projection","author":"Sun","year":"2019","journal-title":"Eng. Comput."},{"key":"10.1016\/j.imavis.2024.104966_bb0070","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3363","article-title":"Dula-net: a dual-projection network for estimating room layouts from a single rgb panorama","author":"Yang","year":"2019"},{"key":"10.1016\/j.imavis.2024.104966_bb0075","doi-asserted-by":"crossref","first-page":"217","DOI":"10.1007\/s11263-011-0505-4","article-title":"Scale invariant feature transform on the sphere: theory and applications","volume":"98","author":"Cruz-Mota","year":"2012","journal-title":"Int. J. Comput. Vis."},{"issue":"12","key":"10.1016\/j.imavis.2024.104966_bb0080","doi-asserted-by":"crossref","first-page":"840","DOI":"10.1016\/j.imavis.2011.09.007","article-title":"Central catadioptric image processing with geodesic metric","volume":"29","author":"Demonceaux","year":"2011","journal-title":"Image Vis. Comput."},{"issue":"2","key":"10.1016\/j.imavis.2024.104966_bb0085","doi-asserted-by":"crossref","first-page":"185","DOI":"10.3233\/ICA-160511","article-title":"Geodesically-corrected zernike descriptors for pose recognition in omni-directional images","volume":"23","author":"Delibasis","year":"2016","journal-title":"Integr. Comput.-Aided Eng."},{"issue":"6","key":"10.1016\/j.imavis.2024.104966_bb0090","doi-asserted-by":"crossref","first-page":"73","DOI":"10.3390\/jimaging4060073","article-title":"Efficient implementation of gaussian and laplacian kernels for feature extraction from ip fisheye cameras","volume":"4","author":"Delibasis","year":"2018","journal-title":"J. Imaging"},{"key":"10.1016\/j.imavis.2024.104966_bb0095","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2021.104264","article-title":"Omnidirectional stereo depth estimation based on spherical deep network","volume":"114","author":"Li","year":"2021","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2024.104966_bb0100","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2022.104590","article-title":"Exploring viewport features for semi-supervised saliency prediction in omnidirectional images","volume":"129","author":"Huang","year":"2023","journal-title":"Image Vis. Comput."},{"key":"10.1016\/j.imavis.2024.104966_bb0105","first-page":"529","article-title":"Learning spherical convolution for fast features from 360 imagery","volume":"30","author":"Su","year":"2017","journal-title":"Proc. Adv. Neural Inf. Proc. Syst."},{"key":"10.1016\/j.imavis.2024.104966_bb0110","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9442","article-title":"Kernel transformer networks for compact spherical convolution","author":"Su","year":"2019"},{"key":"10.1016\/j.imavis.2024.104966_bb0115","series-title":"Proceedings of the European conference on computer vision (ECCV)","first-page":"525","article-title":"Spherenet: Learning spherical representations for detection and classification in omnidirectional images","author":"Coors","year":"2018"},{"key":"10.1016\/j.imavis.2024.104966_bb0120","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9181","article-title":"SpherePHD: applying cnns on a spherical polyhedron representation of 360deg images","author":"Lee","year":"2019"},{"key":"10.1016\/j.imavis.2024.104966_bb0125","doi-asserted-by":"crossref","DOI":"10.1016\/j.imavis.2020.104069","article-title":"Efficient pedestrian detection in top-view fisheye images using compositions of perspective view patches","volume":"105","author":"Chiang","year":"2021","journal-title":"Image Vis. Comput."},{"issue":"5","key":"10.1016\/j.imavis.2024.104966_bb0130","doi-asserted-by":"crossref","first-page":"2403","DOI":"10.3390\/app12052403","article-title":"Expandable spherical projection and feature concatenation methods for real-time road object detection using fisheye image","volume":"12","author":"Kim","year":"2022","journal-title":"Appl. Sci."},{"key":"10.1016\/j.imavis.2024.104966_bb0135","series-title":"2019 IEEE International Conference on Image Processing (ICIP)","first-page":"2956","article-title":"Smaller object detection for real-time embedded traffic flow estimation using fish-eye cameras","author":"Chen","year":"2019"},{"key":"10.1016\/j.imavis.2024.104966_bb0140","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops","first-page":"2373","article-title":"Rotinvmtl: rotation invariant multinet on fisheye images for autonomous driving applications","author":"Arsenali","year":"2019"},{"key":"10.1016\/j.imavis.2024.104966_bb0145","series-title":"Yolov7: Trainable bag-of-freebies sets new state-of-the-art for real-time object detectors, arXiv preprint","author":"Wang","year":"2022"},{"key":"10.1016\/j.imavis.2024.104966_bb0150","series-title":"In: 2020 International Conference on Pervasive Artificial Intelligence (ICPAI)","first-page":"194","article-title":"360 degree fish eye optical construction for equirectangular projection of panoramic images","author":"Lin","year":"2020"},{"key":"10.1016\/j.imavis.2024.104966_bb0155","doi-asserted-by":"crossref","DOI":"10.1016\/j.dib.2019.104752","article-title":"Datasets for face and object detection in fisheye images","volume":"27","author":"Fu","year":"2019","journal-title":"Data Brief"},{"key":"10.1016\/j.imavis.2024.104966_bb0160","series-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition","first-page":"779","article-title":"You only look once: unified, real-time object detection","author":"Redmon","year":"2016"},{"key":"10.1016\/j.imavis.2024.104966_bb0165","first-page":"7263","article-title":"Yolo9000: better, faster, stronger","author":"Redmon","year":"2017","journal-title":"Proc. IEEE Conf. Comput. Vis. Pattern Recognit."},{"key":"10.1016\/j.imavis.2024.104966_bb0170","series-title":"Yolov3: An incremental improvement, arXiv preprint","author":"Redmon","year":"2018"},{"key":"10.1016\/j.imavis.2024.104966_bb0175","series-title":"Yolov4: Optimal speed and accuracy of object detection, arXiv preprint","author":"Bochkovskiy","year":"2020"},{"key":"10.1016\/j.imavis.2024.104966_bb0180","unstructured":"J. Glenn, Yolov5, https:\/\/github.com\/ultralytics\/yolov5, accessed: October 9, 2022."},{"key":"10.1016\/j.imavis.2024.104966_bb0185","author":"Ge"},{"key":"10.1016\/j.imavis.2024.104966_bb0190","author":"Li"},{"key":"10.1016\/j.imavis.2024.104966_bb0195","unstructured":"J. Glenn, YOLOv8, https:\/\/github.com\/ultralytics\/ultralytics, accessed: February 14, 2023."},{"key":"10.1016\/j.imavis.2024.104966_bb0200","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"9308","article-title":"Deformable convnets v2: More deformable, better results","author":"Zhu","year":"2019"},{"key":"10.1016\/j.imavis.2024.104966_bb0205","series-title":"Proceedings of the IEEE International Conference on Computer Vision","first-page":"764","article-title":"Deformable convolutional networks","author":"Dai","year":"2017"},{"key":"10.1016\/j.imavis.2024.104966_bb0210","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"10012","article-title":"Swin transformer: hierarchical vision transformer using shifted windows","author":"Liu","year":"2021"},{"issue":"7","key":"10.1016\/j.imavis.2024.104966_bb0215","doi-asserted-by":"crossref","first-page":"3634","DOI":"10.3390\/s23073634","article-title":"Swin-transformer-based yolov5 for small-object detection in remote sensing images","volume":"23","author":"Cao","year":"2023","journal-title":"Sensors"},{"key":"10.1016\/j.imavis.2024.104966_bb0220","series-title":"In: 2023 18th International Conference on Machine Vision and Applications (MVA)","first-page":"1","article-title":"Small object detection for birds with swin transformer","author":"Huo","year":"2023"},{"issue":"23","key":"10.1016\/j.imavis.2024.104966_bb0225","doi-asserted-by":"crossref","first-page":"12398","DOI":"10.3390\/app122312398","article-title":"Classification and object detection of 360\u00b0 omnidirectional images based on continuity-distortion processing and attention mechanism","volume":"12","author":"Zhang","year":"2022","journal-title":"Appl. Sci."}],"container-title":["Image and Vision Computing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885624000702?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0262885624000702?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2025,10,2]],"date-time":"2025-10-02T01:39:40Z","timestamp":1759369180000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0262885624000702"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4]]},"references-count":45,"alternative-id":["S0262885624000702"],"URL":"https:\/\/doi.org\/10.1016\/j.imavis.2024.104966","relation":{},"ISSN":["0262-8856"],"issn-type":[{"value":"0262-8856","type":"print"}],"subject":[],"published":{"date-parts":[[2024,4]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Improved YOLOv7 models based on modulated deformable convolution and swin transformer for object detection in fisheye images","name":"articletitle","label":"Article Title"},{"value":"Image and Vision Computing","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.imavis.2024.104966","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2024 Elsevier B.V. All rights reserved.","name":"copyright","label":"Copyright"}],"article-number":"104966"}}