{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,5,19]],"date-time":"2026-05-19T14:50:05Z","timestamp":1779202205559,"version":"3.51.4"},"reference-count":213,"publisher":"Springer Science and Business Media LLC","issue":"24","license":[{"start":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:00:00Z","timestamp":1705017600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T00:00:00Z","timestamp":1705017600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52274160"],"award-info":[{"award-number":["52274160"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["52074305"],"award-info":[{"award-number":["52074305"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["51874300"],"award-info":[{"award-number":["51874300"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100019488","name":"National Natural Science Foundation of China-Shanxi Joint Fund for Coal-Based Low-Carbon Technology","doi-asserted-by":"publisher","award":["U1510115"],"award-info":[{"award-number":["U1510115"]}],"id":[{"id":"10.13039\/501100019488","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-023-17949-4","type":"journal-article","created":{"date-parts":[[2024,1,12]],"date-time":"2024-01-12T06:01:51Z","timestamp":1705039311000},"page":"65603-65661","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":68,"title":["A review of object detection: Datasets, performance evaluation, architecture, applications and current trends"],"prefix":"10.1007","volume":"83","author":[{"given":"Wei","family":"Chen","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jinjin","family":"Luo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Fan","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zijian","family":"Tian","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,1,12]]},"reference":[{"key":"17949_CR1","doi-asserted-by":"publisher","first-page":"297","DOI":"10.1007\/978-3-319-10584-0_20","volume":"8695","author":"B Hariharan","year":"2014","unstructured":"Hariharan B, Arbelaez P, Girshick R, Malik J (2014) Simultaneous detection and segmentation. Computer Vision - ECCV 8695:297\u2013312. https:\/\/doi.org\/10.1007\/978-3-319-10584-0_20","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR2","doi-asserted-by":"publisher","first-page":"3150","DOI":"10.1109\/CVPR.2016.343","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"J Dai","year":"2016","unstructured":"Dai J, He K, Sun J (2016) Instance-aware semantic segmentation via multi-task network cascades. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3150\u20133158. https:\/\/doi.org\/10.1109\/CVPR.2016.343"},{"key":"17949_CR3","doi-asserted-by":"publisher","first-page":"447","DOI":"10.1109\/CVPR.2015.7298642","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"B Hariharan","year":"2015","unstructured":"Hariharan B, Arbelaez P, Girshick R, Malik J (2015) Hypercolumns for object segmentation and fine-grained localization. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 447\u2013456. https:\/\/doi.org\/10.1109\/CVPR.2015.7298642"},{"issue":"10","key":"17949_CR4","doi-asserted-by":"publisher","first-page":"2896","DOI":"10.1109\/TCSVT.2017.2736553","volume":"28","author":"K Kang","year":"2018","unstructured":"Kang K, Li H, Yan J, Zeng X, Yang B, Xiao T, Zhang C, Wang Z, Wang R, Wang X, Ouyang W (2018) T-CNN: Tubelets with convolutional neural networks for object detection from videos. IEEE Transactions on Circuits and Systems for Video Technology 28(10):2896\u20132907. https:\/\/doi.org\/10.1109\/TCSVT.2017.2736553","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"issue":"5","key":"17949_CR5","doi-asserted-by":"publisher","first-page":"859","DOI":"10.23919\/JSEE.2020.000066","volume":"31","author":"T Chen","year":"2020","unstructured":"Chen T, Yang P, Peng H, Qian Z (2020) Multi-target tracking algorithm based on PHD filter against multi-range-false-target jamming. J Syst Eng Electron 31(5):859\u2013870. https:\/\/doi.org\/10.23919\/JSEE.2020.000066","journal-title":"J Syst Eng Electron"},{"key":"17949_CR6","doi-asserted-by":"publisher","first-page":"3128","DOI":"10.1109\/CVPR.2015.7298932","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"A Karpathy","year":"2015","unstructured":"Karpathy A, Fei-Fei L (2015) Deep visual-semantic alignments for generating image descriptions. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3128\u20133137. https:\/\/doi.org\/10.1109\/CVPR.2015.7298932"},{"issue":"6","key":"17949_CR7","doi-asserted-by":"publisher","first-page":"1367","DOI":"10.1109\/TPAMI.2017.2708709","volume":"40","author":"Q Wu","year":"2018","unstructured":"Wu Q, Shen C, Wang P, Dick A, Avd H (2018) Image captioning and visual question answering based on attributes and external knowledge. IEEE Trans Pattern Anal Mach Intell 40(6):1367\u20131381. https:\/\/doi.org\/10.1109\/TPAMI.2017.2708709","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR8","first-page":"2048","volume":"37","author":"K Xu","year":"2015","unstructured":"Xu K, Ba JL, Kiros R, Cho K, Courville A, Salakhutdinov R, Zemel RS, Bengio Y (2015) Show, attend and tell: neural image caption generation with visual attention. International Conference on Machine Learning 37:2048\u20132057 https:\/\/dl.acm.org\/doi\/10.5555\/3045118.3045336","journal-title":"International Conference on Machine Learning"},{"key":"17949_CR9","doi-asserted-by":"publisher","first-page":"443","DOI":"10.1007\/978-3-319-46475-6_28","volume":"2016","author":"L Zhang","year":"2016","unstructured":"Zhang L, Lin L, Liang X, He K (2016) Is faster R-CNN doing well for pedestrian detection? Computer Vision - ECCV 2016:443\u2013457. https:\/\/doi.org\/10.1007\/978-3-319-46475-6_28","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR10","doi-asserted-by":"publisher","first-page":"2056","DOI":"10.1109\/ICCV.2013.257","volume-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","author":"W Ouyang","year":"2013","unstructured":"Ouyang W, Wang X (2013) Joint deep learning for pedestrian detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2056\u20132063. https:\/\/doi.org\/10.1109\/ICCV.2013.257"},{"key":"17949_CR11","doi-asserted-by":"publisher","first-page":"1701","DOI":"10.1109\/CVPR.2014.220","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"Y Taigman","year":"2014","unstructured":"Taigman Y, Yang M, Ranzato MA, Wolf L (2014) DeepFace: closing the gap to human-level performance in face verification. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1701\u20131708. https:\/\/doi.org\/10.1109\/CVPR.2014.220"},{"key":"17949_CR12","doi-asserted-by":"publisher","first-page":"4159","DOI":"10.1109\/CVPR.2016.451","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"Z Zhang","year":"2016","unstructured":"Zhang Z, Zhang C, Shen W, Yao C, Liu W, Bai X (2016) Multi-oriented text detection with fully convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4159\u20134167. https:\/\/doi.org\/10.1109\/CVPR.2016.451"},{"key":"17949_CR13","doi-asserted-by":"publisher","first-page":"3296","DOI":"10.1109\/CVPR.2017.351","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"J Huang","year":"2017","unstructured":"Huang J, Rathod V, Sun C, Zhu M, Korattikara A, Fathi A, Fischer I, Wojna Z, Song Y, Guadarrama S, Murphy K (2017) Speed\/accuracy trade-offs for modern convolutional object detectors. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3296\u20133297. https:\/\/doi.org\/10.1109\/CVPR.2017.351"},{"key":"17949_CR14","doi-asserted-by":"publisher","first-page":"124","DOI":"10.1109\/IVS.2016.7535375","volume-title":"IEEE intelligent vehicles symposium (IV)","author":"Q Fan","year":"2016","unstructured":"Fan Q, Brown L, Smith J (2016) A closer look at faster R-CNN for vehicle detection. In: IEEE intelligent vehicles symposium (IV), pp 124\u2013129. https:\/\/doi.org\/10.1109\/IVS.2016.7535375"},{"key":"17949_CR15","doi-asserted-by":"publisher","first-page":"181","DOI":"10.1109\/ACPR.2013.33","volume-title":"Asian conference on pattern recognition (ACPR)","author":"X Chen","year":"2013","unstructured":"Chen X, Xiang S, Liu C-L, Pan C-H (2013) Vehicle detection in satellite images by parallel deep convolutional neural networks. In: Asian conference on pattern recognition (ACPR), pp 181\u2013185. https:\/\/doi.org\/10.1109\/ACPR.2013.33"},{"issue":"3","key":"17949_CR16","doi-asserted-by":"publisher","first-page":"257","DOI":"10.1109\/JPROC.2023.3238524","volume":"111","author":"Z Zou","year":"2023","unstructured":"Zou Z, Chen K, Shi Z, Guo Y, Ye J (2023) Object detection in 20 years: a survey. Proc IEEE 111(3):257\u2013276. https:\/\/doi.org\/10.1109\/JPROC.2023.3238524","journal-title":"Proc IEEE"},{"key":"17949_CR17","doi-asserted-by":"publisher","first-page":"6526","DOI":"10.1109\/CVPR.2017.691","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"X Chen","year":"2017","unstructured":"Chen X, Ma H, Wan J, Li B, Xia T (2017) Multi-view 3D object detection network for autonomous driving. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6526\u20136534. https:\/\/doi.org\/10.1109\/CVPR.2017.691"},{"issue":"3","key":"17949_CR18","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1145\/1922649.1922653","volume":"43","author":"JK Aggarwal","year":"2011","unstructured":"Aggarwal JK, Ryoo MS (2011) Human activity analysis: a review. ACM Comput Surv 43(3):1\u201343. https:\/\/doi.org\/10.1145\/1922649.1922653","journal-title":"ACM Comput Surv"},{"key":"17949_CR19","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/ICCE-Berlin.2018.8576190","volume-title":"Proceedings of the IEEE 8th international conference on consumer electronics \u2013 Berlin (ICCE-Berlin)","author":"P Viswanath","year":"2018","unstructured":"Viswanath P, Nagori S, Mody M, Mathew M, Swami P (2018) End to end learning based self-driving using JacintoNet. In: Proceedings of the IEEE 8th international conference on consumer electronics \u2013 Berlin (ICCE-Berlin), pp 1\u20134. https:\/\/doi.org\/10.1109\/ICCE-Berlin.2018.8576190"},{"issue":"2","key":"17949_CR20","doi-asserted-by":"publisher","first-page":"197","DOI":"10.1109\/MITS.2021.3049404","volume":"14","author":"L Lin","year":"2022","unstructured":"Lin L, Li W, Bi H, Qin L (2022) Vehicle trajectory prediction using LSTMs with spatial-temporal attention mechanisms. IEEE Intell Transp Syst Mag 14(2):197\u2013208. https:\/\/doi.org\/10.1109\/MITS.2021.3049404","journal-title":"IEEE Intell Transp Syst Mag"},{"issue":"13","key":"17949_CR21","doi-asserted-by":"publisher","first-page":"1473","DOI":"10.1163\/156855307782148578","volume":"21","author":"V Krueger","year":"2007","unstructured":"Krueger V, Kragic D, Ude A, Geib C (2007) The meaning of action: a review on action recognition and mapping. Adv Robot 21(13):1473\u20131501. https:\/\/doi.org\/10.1163\/156855307782148578","journal-title":"Adv Robot"},{"issue":"1","key":"17949_CR22","doi-asserted-by":"publisher","first-page":"4","DOI":"10.1007\/s13721-019-0209-1","volume":"9","author":"R Javed","year":"2019","unstructured":"Javed R, Rahim MSM, Saba T, Rehman A (2019) A comparative study of features selection for skin lesion detection from dermoscopic images. Network Modeling Analysis in Health Informatics and Bioinformatics 9(1):4. https:\/\/doi.org\/10.1007\/s13721-019-0209-1","journal-title":"Network Modeling Analysis in Health Informatics and Bioinformatics"},{"issue":"4","key":"17949_CR23","doi-asserted-by":"publisher","first-page":"820","DOI":"10.1109\/TIM.2007.913703","volume":"57","author":"M Palmese","year":"2008","unstructured":"Palmese M, Trucco A (2008) From 3-D sonar images to augmented reality models for objects buried on the seafloor. IEEE Trans Instrum Meas 57(4):820\u2013828. https:\/\/doi.org\/10.1109\/TIM.2007.913703","journal-title":"IEEE Trans Instrum Meas"},{"key":"17949_CR24","doi-asserted-by":"publisher","first-page":"2785","DOI":"10.1109\/CVPR46437.2021.00281","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"J Han","year":"2021","unstructured":"Han J, Ding J, Xue N, Xia G-S (2021) ReDet: a rotation-equivariant detector for aerial object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 2785\u20132794. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00281"},{"issue":"2","key":"17949_CR25","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1023\/B:VISI.0000013087.49260.fb","volume":"57","author":"P Viola","year":"2004","unstructured":"Viola P, Jones MJ (2004) Robust real-time face detection. Int J Comput Vis 57(2):137\u2013154. https:\/\/doi.org\/10.1023\/B:VISI.0000013087.49260.fb","journal-title":"Int J Comput Vis"},{"key":"17949_CR26","doi-asserted-by":"publisher","first-page":"886","DOI":"10.1109\/CVPR.2005.177","volume-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition (CVPR'05)","author":"N Dalal","year":"2005","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition (CVPR'05), pp 886\u2013893. https:\/\/doi.org\/10.1109\/CVPR.2005.177"},{"key":"17949_CR27","doi-asserted-by":"publisher","first-page":"2241","DOI":"10.1109\/CVPR.2010.5539906","volume-title":"Proceedings of the IEEE computer society conference on computer vision and pattern recognition (CVPR)","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb PF, Girshick RB, McAllester D (2010) Cascade object detection with deformable part models. In: Proceedings of the IEEE computer society conference on computer vision and pattern recognition (CVPR), pp 2241\u20132248. https:\/\/doi.org\/10.1109\/CVPR.2010.5539906"},{"key":"17949_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/CVPR.2008.4587597","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"P Felzenszwalb","year":"2008","unstructured":"Felzenszwalb P, McAllester D, Ramanan D (2008) A discriminatively trained, multiscale, deformable part model. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1\u20138. https:\/\/doi.org\/10.1109\/CVPR.2008.4587597"},{"issue":"9","key":"17949_CR29","doi-asserted-by":"publisher","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb PF, Girshick RB, McAllester D, Ramanan D (2010) Object detection with discriminatively trained part-based models. IEEE Trans Pattern Anal Mach Intell 32(9):1627\u20131645. https:\/\/doi.org\/10.1109\/TPAMI.2009.167","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR30","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/ICIP.2002.1038171","volume-title":"Proceedings of the international conference on image processing (ICIP)","author":"R Lienhart","year":"2002","unstructured":"Lienhart R, Maydt J (2002) An extended set of Haar-like features for rapid object detection. In: Proceedings of the international conference on image processing (ICIP), vol 1, pp 1\u20131. https:\/\/doi.org\/10.1109\/ICIP.2002.1038171"},{"issue":"2","key":"17949_CR31","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110. https:\/\/doi.org\/10.1023\/B:VISI.0000029664.99615.94","journal-title":"Int J Comput Vis"},{"issue":"4","key":"17949_CR32","doi-asserted-by":"publisher","first-page":"917","DOI":"10.1177\/1536867X1601600407","volume":"16","author":"N Guenther","year":"2016","unstructured":"Guenther N, Schonlau M (2016) Support vector machines Stata Journal 16(4):917\u2013937. https:\/\/doi.org\/10.1177\/1536867X1601600407","journal-title":"Support vector machines Stata Journal"},{"key":"17949_CR33","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/CVPR.2001.990517","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"P Viola","year":"2001","unstructured":"Viola P, Jones M (2001) Rapid object detection using a boosted cascade of simple features. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), p 1. https:\/\/doi.org\/10.1109\/CVPR.2001.990517"},{"issue":"2","key":"17949_CR34","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CKI, Winn J, Zisserman A (2010) The Pascal visual object classes (VOC) challenge. Int J Comput Vis 88(2):303\u2013338. https:\/\/doi.org\/10.1007\/s11263-009-0275-4","journal-title":"Int J Comput Vis"},{"key":"17949_CR35","volume-title":"Application of convolutional neural network for image classification on Pascal VOC challenge 2012 dataset","author":"S Shetty","year":"2016","unstructured":"Shetty S (2016) Application of convolutional neural network for image classification on Pascal VOC challenge 2012 dataset. Arxiv. https:\/\/arxiv.org\/abs\/1607.03785"},{"key":"17949_CR36","doi-asserted-by":"publisher","first-page":"740","DOI":"10.1007\/978-3-319-10602-1_48","volume":"8693","author":"T-Y Lin","year":"2014","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Dollar P, Zitnick CL (2014) Microsoft COCO: common objects in context. Computer Vision - ECCV 8693:740\u2013755. https:\/\/doi.org\/10.1007\/978-3-319-10602-1_48","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR37","doi-asserted-by":"publisher","first-page":"248","DOI":"10.1109\/CVPR.2009.5206848","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"J Deng","year":"2009","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Li F-F (2009) ImageNet: a large-scale hierarchical image database. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 248\u2013255. https:\/\/doi.org\/10.1109\/CVPR.2009.5206848"},{"issue":"3","key":"17949_CR38","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M, Berg AC, Fei-Fei L (2015) ImageNet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252. https:\/\/doi.org\/10.1007\/s11263-015-0816-y","journal-title":"Int J Comput Vis"},{"issue":"7","key":"17949_CR39","doi-asserted-by":"publisher","first-page":"1956","DOI":"10.1007\/s11263-020-01316-z","volume":"128","author":"A Kuznetsova","year":"2020","unstructured":"Kuznetsova A, Rom H, Alldrin N, Uijlings J, Krasin I, Pont-Tuset J, Kamali S, Popov S, Malloci M, Kolesnikov A, Duerig T, Ferrari V (2020) The open images dataset V4. Int J Comput Vis 128(7):1956\u20131981. https:\/\/doi.org\/10.1007\/s11263-020-01316-z","journal-title":"Int J Comput Vis"},{"key":"17949_CR40","doi-asserted-by":"publisher","first-page":"8429","DOI":"10.1109\/ICCV.2019.00852","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"S Shao","year":"2019","unstructured":"Shao S, Li Z, Zhang T, Peng C, Yu G, Zhang X, Li J, Sun J (2019) Objects365: a large-scale, high-quality dataset for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 8429\u20138438. https:\/\/doi.org\/10.1109\/ICCV.2019.00852"},{"key":"17949_CR41","doi-asserted-by":"publisher","first-page":"3974","DOI":"10.1109\/CVPR.2018.00418","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"G-S Xia","year":"2018","unstructured":"Xia G-S, Bai X, Ding J, Zhu Z, Belongie S, Luo J, Datcu M, Pelillo M, Zhang L (2018) DOTA: a large-scale dataset for object detection in aerial images. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 3974\u20133983. https:\/\/doi.org\/10.1109\/CVPR.2018.00418"},{"key":"17949_CR42","doi-asserted-by":"publisher","first-page":"5351","DOI":"10.1109\/CVPR.2019.00550","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"A Gupta","year":"2019","unstructured":"Gupta A, Dollar P, Girshick R (2019) LVIS: a dataset for large vocabulary instance segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 5351\u20135359. https:\/\/doi.org\/10.1109\/CVPR.2019.00550"},{"key":"17949_CR43","doi-asserted-by":"publisher","unstructured":"Wen L, Du D, Cai Z, Lei Z, Chang M-C, Qi H, Lim J, Yang M-H, Lyu S (2020) UA-DETRAC: a new benchmark and protocol for multi-object detection and tracking. Comput Vis Image Underst 193. https:\/\/doi.org\/10.1016\/j.cviu.2020.102907","DOI":"10.1016\/j.cviu.2020.102907"},{"issue":"2","key":"17949_CR44","doi-asserted-by":"publisher","first-page":"380","DOI":"10.1109\/TMM.2019.2929005","volume":"22","author":"S Zhang","year":"2020","unstructured":"Zhang S, Xie Y, Wan J, Xia H, Li SZ, Guo G (2020) WiderPerson: a diverse dataset for dense pedestrian detection in the wild. IEEE Transactions on Multimedia 22(2):380\u2013393. https:\/\/doi.org\/10.1109\/TMM.2019.2929005","journal-title":"IEEE Transactions on Multimedia"},{"key":"17949_CR45","doi-asserted-by":"publisher","unstructured":"Zhang J, Zou X, Kuang L-D, Wang J, Sherratt RS, Yu X (2022) CCTSDB 2021: a more comprehensive traffic sign detection benchmark. Human-centric Computing and Information Sciences 12. https:\/\/doi.org\/10.22967\/HCIS.2022.12.023","DOI":"10.22967\/HCIS.2022.12.023"},{"key":"17949_CR46","doi-asserted-by":"publisher","first-page":"1246","DOI":"10.1109\/WACV45572.2020.9093394","volume-title":"Proceedings of the IEEE winter conference on applications of computer vision (WACV)","author":"X Yu","year":"2020","unstructured":"Yu X, Gong Y, Jiang N, Ye Q, Han Z (2020) Scale match for tiny person detection. In: Proceedings of the IEEE winter conference on applications of computer vision (WACV), pp 1246\u20131254. https:\/\/doi.org\/10.1109\/WACV45572.2020.9093394"},{"key":"17949_CR47","doi-asserted-by":"publisher","first-page":"580","DOI":"10.1109\/CVPR.2014.81","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"R Girshick","year":"2014","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 580\u2013587. https:\/\/doi.org\/10.1109\/CVPR.2014.81"},{"issue":"6","key":"17949_CR48","doi-asserted-by":"publisher","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) ImageNet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390. https:\/\/doi.org\/10.1145\/3065386","journal-title":"Commun ACM"},{"issue":"2","key":"17949_CR49","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JRR Uijlings","year":"2013","unstructured":"Uijlings JRR, van de Sande KEA, Gevers T, Smeulders AWM (2013) Selective search for object recognition. Int J Comput Vis 104(2):154\u2013171. https:\/\/doi.org\/10.1007\/s11263-013-0620-5","journal-title":"Int J Comput Vis"},{"issue":"9","key":"17949_CR50","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2015","unstructured":"He K, Zhang X, Ren S, Sun J (2015) Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans Pattern Anal Mach Intell 37(9):1904\u20131916. https:\/\/doi.org\/10.1109\/TPAMI.2015.2389824","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR51","doi-asserted-by":"publisher","first-page":"1440","DOI":"10.1109\/ICCV.2015.169","volume-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","author":"R Girshick","year":"2015","unstructured":"Girshick R (2015) Fast R-CNN. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1440\u20131448. https:\/\/doi.org\/10.1109\/ICCV.2015.169"},{"key":"17949_CR52","doi-asserted-by":"publisher","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1409.1556","DOI":"10.48550\/arXiv.1409.1556"},{"key":"17949_CR53","doi-asserted-by":"publisher","unstructured":"Gao Y, Liu W, Lombardi F (2020) Design and implementation of an approximate softmax layer for deep neural networks. In: Proceedings of the IEEE international symposium on circuits and systems (ISCAS), pp 1\u20135. https:\/\/doi.org\/10.1109\/ISCAS45731.2020.9180870","DOI":"10.1109\/ISCAS45731.2020.9180870"},{"issue":"6","key":"17949_CR54","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2017","unstructured":"Ren S, He K, Girshick R, Sun J (2017) Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149. https:\/\/doi.org\/10.1109\/TPAMI.2016.2577031","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"issue":"4","key":"17949_CR55","doi-asserted-by":"publisher","first-page":"640","DOI":"10.1109\/TPAMI.2016.2572683","volume":"39","author":"E Shelhamer","year":"2017","unstructured":"Shelhamer E, Long J, Darrell T (2017) Fully convolutional networks for semantic segmentation. IEEE Trans Pattern Anal Mach Intell 39(4):640\u2013651. https:\/\/doi.org\/10.1109\/TPAMI.2016.2572683","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR56","doi-asserted-by":"publisher","first-page":"845","DOI":"10.1109\/CVPR.2016.98","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"T Kong","year":"2016","unstructured":"Kong T, Yao A, Chen Y, Sun F (2016) HyperNet: towards accurate region proposal generation and joint object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 845\u2013853. https:\/\/doi.org\/10.1109\/CVPR.2016.98"},{"key":"17949_CR57","doi-asserted-by":"publisher","unstructured":"Dai J, Li Y, He K, Sun J (2016) R-FCN: object detection via region-based fully convolutional networks. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1605.06409","DOI":"10.48550\/arXiv.1605.06409"},{"key":"17949_CR58","doi-asserted-by":"publisher","first-page":"936","DOI":"10.1109\/CVPR.2017.106","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"T-Y Lin","year":"2017","unstructured":"Lin T-Y, Dollar P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 936\u2013944. https:\/\/doi.org\/10.1109\/CVPR.2017.106"},{"key":"17949_CR59","doi-asserted-by":"publisher","first-page":"9300","DOI":"10.1109\/CVPR.2019.00953","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"X Zhu","year":"2019","unstructured":"Zhu X, Hu H, Lin S, Dai J, Soc IC (2019) Deformable ConvNets v2: More deformable, better results. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 9300\u20139308. https:\/\/doi.org\/10.1109\/CVPR.2019.00953"},{"key":"17949_CR60","doi-asserted-by":"publisher","first-page":"764","DOI":"10.1109\/ICCV.2017.89","volume-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","author":"J Dai","year":"2017","unstructured":"Dai J, Qi H, Xiong Y, Li Y, Zhang G, Hu H, Wei Y (2017) Deformable convolutional networks. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 764\u2013773. https:\/\/doi.org\/10.1109\/ICCV.2017.89"},{"key":"17949_CR61","doi-asserted-by":"publisher","first-page":"2980","DOI":"10.1109\/ICCV.2017.322","volume-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","author":"K He","year":"2017","unstructured":"He K, Gkioxari G, Dollar P, Girshick R (2017) Mask R-CNN. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2980\u20132988. https:\/\/doi.org\/10.1109\/ICCV.2017.322"},{"key":"17949_CR62","doi-asserted-by":"publisher","first-page":"6154","DOI":"10.1109\/CVPR.2018.00644","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"Z Cai","year":"2018","unstructured":"Cai Z, Vasconcelos N (2018) Cascade R-CNN: delving into high quality object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 6154\u20136162. https:\/\/doi.org\/10.1109\/CVPR.2018.00644"},{"key":"17949_CR63","doi-asserted-by":"publisher","first-page":"3578","DOI":"10.1109\/CVPR.2018.00377","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"B Singh","year":"2018","unstructured":"Singh B, Davis LS (2018) An analysis of scale invariance in object detection - SNIP. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 3578\u20133587. https:\/\/doi.org\/10.1109\/CVPR.2018.00377"},{"key":"17949_CR64","doi-asserted-by":"publisher","first-page":"6053","DOI":"10.1109\/ICCV.2019.00615","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"Y Li","year":"2019","unstructured":"Li Y, Chen Y, Wang N, Zhang Z (2019) Scale-aware trident networks for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 6053\u20136062. https:\/\/doi.org\/10.1109\/ICCV.2019.00615"},{"key":"17949_CR65","doi-asserted-by":"publisher","first-page":"10208","DOI":"10.1109\/CVPR46437.2021.01008","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"S Qiao","year":"2021","unstructured":"Qiao S, Chen L-C, Yuille A (2021) DetectoRS: detecting objects with recursive feature pyramid and switchable atrous convolution. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 10208\u201310219. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01008"},{"key":"17949_CR66","doi-asserted-by":"publisher","first-page":"779","DOI":"10.1109\/CVPR.2016.91","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"J Redmon","year":"2016","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 779\u2013788. https:\/\/doi.org\/10.1109\/CVPR.2016.91"},{"key":"17949_CR67","doi-asserted-by":"publisher","first-page":"21","DOI":"10.1007\/978-3-319-46448-0_2","volume":"9905","author":"W Liu","year":"2016","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y (2016) Berg AC (2016) SSD: single shot multibox detector. Computer Vision - ECCV 9905:21\u201337. https:\/\/doi.org\/10.1007\/978-3-319-46448-0_2","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR68","doi-asserted-by":"publisher","first-page":"6517","DOI":"10.1109\/CVPR.2017.690","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"J Redmon","year":"2017","unstructured":"Redmon J, Farhadi A (2017) YOLO9000: Better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6517\u20136525. https:\/\/doi.org\/10.1109\/CVPR.2017.690"},{"key":"17949_CR69","doi-asserted-by":"publisher","unstructured":"Fu C-Y, Liu W, Ranga A, Tyagi A, Berg AC (2017) DSSD: Deconvolutional single shot detector. ArXiv. https:\/\/doi.org\/10.48550\/arXiv.1701.06659","DOI":"10.48550\/arXiv.1701.06659"},{"key":"17949_CR70","doi-asserted-by":"publisher","first-page":"770","DOI":"10.1109\/CVPR.2016.90","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"K He","year":"2016","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778. https:\/\/doi.org\/10.1109\/CVPR.2016.90"},{"key":"17949_CR71","doi-asserted-by":"publisher","unstructured":"Jeong J, Park H, Kwak N (2017) Enhancement of SSD by concatenating feature maps for object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1705.09587","DOI":"10.48550\/arXiv.1705.09587"},{"key":"17949_CR72","doi-asserted-by":"publisher","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Dollar P (2017) Focal loss for dense object detection. In: proceedings of the IEEE international conference on computer vision (ICCV), pp 2999-3007. https:\/\/doi.org\/10.1109\/ICCV.2017.324","DOI":"10.1109\/ICCV.2017.324"},{"key":"17949_CR73","doi-asserted-by":"publisher","unstructured":"Li Z, Zhou F (2018) FSSD: feature fusion single shot multibox detector. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1712.00960","DOI":"10.48550\/arXiv.1712.00960"},{"key":"17949_CR74","doi-asserted-by":"publisher","unstructured":"Redmon J, Farhadi A (2018) YOLOv3: An incremental improvement. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1804.02767","DOI":"10.48550\/arXiv.1804.02767"},{"issue":"3","key":"17949_CR75","doi-asserted-by":"publisher","first-page":"361","DOI":"10.1093\/aob\/mcg091","volume":"91","author":"XY Yin","year":"2003","unstructured":"Yin XY, Goudriaan J, Lantinga EA, Vos J, Spiertz HJ (2003) A flexible sigmoid function of determinate growth. Ann Bot 91(3):361\u2013371. https:\/\/doi.org\/10.1093\/aob\/mcg091","journal-title":"Ann Bot"},{"key":"17949_CR76","doi-asserted-by":"publisher","unstructured":"Bochkovskiy A, Wang C-Y, Mark Liao H-Y (2020) YOLOv4: optimal speed and accuracy of object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2004.10934","DOI":"10.48550\/arXiv.2004.10934"},{"key":"17949_CR77","doi-asserted-by":"publisher","first-page":"6022","DOI":"10.1109\/ICCV.2019.00612","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"S Yun","year":"2019","unstructured":"Yun S, Han D, Oh SJ, Chun S, Choe J, Yoo Y (2019) CutMix: regularization strategy to train strong classifiers with localizable features. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 6022\u20136031. https:\/\/doi.org\/10.1109\/ICCV.2019.00612"},{"key":"17949_CR78","unstructured":"Ghiasi G, Lin T-Y, Le QV (2018) DropBlock: a regularization method for convolutional networks. Adv Neural Inf Proces Syst:10750\u201310760. https:\/\/dl.acm.org\/doi\/10.5555\/3327546.3327732"},{"key":"17949_CR79","doi-asserted-by":"publisher","unstructured":"Zheng Z, Wang P, Liu W, Li J, Ye R, Ren D (2020) Distance-IoU loss: faster and better learning for bounding box regression. ArXiv. https:\/\/doi.org\/10.48550\/arXiv.1911.08287","DOI":"10.48550\/arXiv.1911.08287"},{"key":"17949_CR80","doi-asserted-by":"publisher","unstructured":"Misra D (2019) Mish: a self regularized non-monotonic neural activation function. ArXiv. https:\/\/doi.org\/10.48550\/arXiv.1908.08681","DOI":"10.48550\/arXiv.1908.08681"},{"issue":"9","key":"17949_CR81","doi-asserted-by":"publisher","first-page":"1904","DOI":"10.1109\/TPAMI.2015.2389824","volume":"37","author":"K He","year":"2014","unstructured":"He K, Zhang X, Ren S, Sun J (2014) Spatial pyramid pooling in deep convolutional networks for visual recognition. IEEE Trans Pattern Anal Mach Intell 37(9):1904\u20131916. https:\/\/doi.org\/10.1109\/TPAMI.2015.2389824","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR82","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/978-3-030-01234-2_1","volume":"11211","author":"S Woo","year":"2018","unstructured":"Woo S, Park J, Lee J-Y (2018) Kweon IS (2018) CBAM: convolutional block attention module. Computer Vision - ECCV 11211:3\u201319. https:\/\/doi.org\/10.1007\/978-3-030-01234-2_1","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR83","doi-asserted-by":"publisher","first-page":"8759","DOI":"10.1109\/CVPR.2018.00913","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"S Liu","year":"2018","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 8759\u20138768. https:\/\/doi.org\/10.1109\/CVPR.2018.00913"},{"key":"17949_CR84","unstructured":"Jocher G, Chaurasia A, Stoken A, Borovec J, NanoCode, Kwon Y, Michael K, TaoXie, Fang J, Imyhxy, Lorna, Zeng Y, Wong C, V Abhiram, Montes D, Wang Z, Fati C, Nadar J, Laughing, UnglvKitDe, Sonck V, Tkianai, YxNong, Skalski P, Hogan A, Nair D, Strobel M, Jain M (2022) Ultralytics\/yolov5: v7.0 - YOLOv5 SOTA Realtime instance segmentation. Zenodo. https:\/\/ui.adsabs.harvard.edu\/abs\/2022zndo...3908559J"},{"key":"17949_CR85","doi-asserted-by":"publisher","unstructured":"Long X, Deng K, Wang G, Zhang Y, Dang Q, Gao Y, Shen H, Ren J, Han S, Ding E, Wen S (2020) PP-YOLO: an effective and efficient implementation of object detector. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2007.12099","DOI":"10.48550\/arXiv.2007.12099"},{"key":"17949_CR86","doi-asserted-by":"publisher","unstructured":"Wang X, Zhang R, Kong T, Li L, Shen C (2020) SOLOv2: dynamic and fast instance segmentation. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2003.10152","DOI":"10.48550\/arXiv.2003.10152"},{"key":"17949_CR87","doi-asserted-by":"publisher","unstructured":"Cai Y, Li H, Yuan G, Niu W, Li Y, Tang X, Ren B, Wang Y (2021) YOLObile: Real-time object detection on mobile devices via compression-compilation co-design. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2009.05697","DOI":"10.48550\/arXiv.2009.05697"},{"key":"17949_CR88","doi-asserted-by":"publisher","first-page":"7029","DOI":"10.1109\/CVPR.2019.00720","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"G Ghiasi","year":"2019","unstructured":"Ghiasi G, Lin T-Y, Le QV (2019) NAS-FPN: learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 7029\u20137038. https:\/\/doi.org\/10.1109\/CVPR.2019.00720"},{"key":"17949_CR89","doi-asserted-by":"publisher","first-page":"11589","DOI":"10.1109\/CVPR42600.2020.01161","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"X Du","year":"2020","unstructured":"Du X, Lin T-Y, Jin P, Ghiasi G, Tan M, Cui YV, Le Q, Song X (2020) SpineNet: learning scale-permuted backbone for recognition and localization. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 11589\u201311598. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01161"},{"key":"17949_CR90","doi-asserted-by":"publisher","first-page":"10425","DOI":"10.1109\/CVPR42600.2020.01044","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"I Radosavovic","year":"2020","unstructured":"Radosavovic I, Kosaraju RP, Girshick R, He K, Doll\u00e1r P (2020) Designing network design spaces. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 10425\u201310433. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01044"},{"key":"17949_CR91","doi-asserted-by":"publisher","first-page":"13024","DOI":"10.1109\/CVPR46437.2021.01283","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"C-Y Wang","year":"2021","unstructured":"Wang C-Y, Bochkovskiy A, Liao H-YM (2021) Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 13024\u201313033. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01283"},{"key":"17949_CR92","doi-asserted-by":"publisher","first-page":"1571","DOI":"10.1109\/CVPRW50498.2020.00203","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops (CVPRW)","author":"C-Y Wang","year":"2020","unstructured":"Wang C-Y, Liao H-YM WY-H, Chen P-Y, Hsieh J-W, Yeh IH (2020) CSPNet: a new backbone that can enhance learning capability of CNN. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops (CVPRW), pp 1571\u20131580. https:\/\/doi.org\/10.1109\/CVPRW50498.2020.00203"},{"key":"17949_CR93","doi-asserted-by":"publisher","first-page":"13034","DOI":"10.1109\/CVPR46437.2021.01284","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"Q Chen","year":"2021","unstructured":"Chen Q, Wang Y, Yang T, Zhang X, Cheng J, Sun J (2021) You only look one-level feature. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 13034\u201313043. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01284"},{"key":"17949_CR94","doi-asserted-by":"publisher","unstructured":"Huang X, Wang X, Lv W, Bai X, Long X, Deng K, Dang Q, Han S, Liu Q, Hu X, Yu D, Ma Y, Yoshie O (2021) PP-YOLOv2: A practical object detector. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2104.10419","DOI":"10.48550\/arXiv.2104.10419"},{"key":"17949_CR95","doi-asserted-by":"publisher","unstructured":"Ge Z, Liu S, Wang F, Li Z, Sun J (2021) YOLOX: exceeding YOLO series in 2021. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2107.08430","DOI":"10.48550\/arXiv.2107.08430"},{"key":"17949_CR96","first-page":"303","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"Z Ge","year":"2021","unstructured":"Ge Z, Liu S, Liu Z, Yoshie O, Sun J (2021) OTA: optimal transport assignment for object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 303\u2013312"},{"key":"17949_CR97","doi-asserted-by":"publisher","unstructured":"Xu S, Wang X, Lv W, Chang Q, Cui C, Deng K, Wang G, Dang Q, Wei S, Du Y, Lai B (2022) PP-YOLOE: an evolved version of YOLO. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2203.16250","DOI":"10.48550\/arXiv.2203.16250"},{"key":"17949_CR98","doi-asserted-by":"publisher","first-page":"752","DOI":"10.1109\/CVPRW.2019.00103","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops (CVPRW)","author":"Y Lee","year":"2019","unstructured":"Lee Y, Hwang J-w, Lee S, Bae Y, Park J (2019) An energy and GPU-computation efficient backbone network for real-time object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition workshops (CVPRW), pp 752\u2013760. https:\/\/doi.org\/10.1109\/CVPRW.2019.00103"},{"key":"17949_CR99","doi-asserted-by":"publisher","unstructured":"Rao L (2021) TreeNet: a lightweight one-shot aggregation convolutional network. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2109.12342","DOI":"10.48550\/arXiv.2109.12342"},{"key":"17949_CR100","doi-asserted-by":"publisher","unstructured":"Li C, Li L, Jiang H, Weng K, Geng Y, Li L, Ke Z, Li Q, Cheng M, Nie W, Li Y, Zhang B, Liang Y, Zhou L, Xu X, Chu X, Wei X, Wei X (2022) YOLOv6: a single-stage object detection framework for industrial applications. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2209.02976","DOI":"10.48550\/arXiv.2209.02976"},{"key":"17949_CR101","doi-asserted-by":"publisher","first-page":"3490","DOI":"10.1109\/ICCV48922.2021.00349","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"C Feng","year":"2021","unstructured":"Feng C, Zhong Y, Gao Y, Scott MR, Huang W (2021) TOOD: task-aligned one-stage object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 3490\u20133499. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00349"},{"key":"17949_CR102","doi-asserted-by":"publisher","first-page":"8510","DOI":"10.1109\/CVPR46437.2021.00841","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"H Zhang","year":"2021","unstructured":"Zhang H, Wang Y, Dayoub F, Sunderhauf N (2021) VarifocalNet: an IoU-aware dense object detector. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 8510\u20138519. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00841"},{"key":"17949_CR103","doi-asserted-by":"publisher","unstructured":"Gevorgyan Z (2022) SIoU loss: more powerful learning for bounding box regression. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2205.12740","DOI":"10.48550\/arXiv.2205.12740"},{"key":"17949_CR104","doi-asserted-by":"publisher","first-page":"658","DOI":"10.1109\/CVPR.2019.00075","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"H Rezatofighi","year":"2019","unstructured":"Rezatofighi H, Tsoi N, Gwak J, Sadeghian A, Reid I, Savarese S (2019) Generalized intersection over union: a metric and a loss for bounding box regression. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 658\u2013666. https:\/\/doi.org\/10.1109\/CVPR.2019.00075"},{"key":"17949_CR105","doi-asserted-by":"publisher","first-page":"7464","DOI":"10.1109\/CVPR52729.2023.00721","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"C-Y Wang","year":"2022","unstructured":"Wang C-Y, Bochkovskiy A, Mark Liao H-Y (2022) YOLOv7: trainable bag-of-freebies sets new state-of-the-art for real-time object detectors. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 7464\u20137475. https:\/\/doi.org\/10.1109\/CVPR52729.2023.00721"},{"issue":"3","key":"17949_CR106","doi-asserted-by":"publisher","first-page":"975","DOI":"10.6688\/JISE.202307","volume":"39","author":"C-Y Wang","year":"2023","unstructured":"Wang C-Y, Liao H-YM, Yeh IH (2023) Designing network design strategies through gradient path analysis. J Inf Sci Eng 39(3):975\u2013995. https:\/\/doi.org\/10.6688\/JISE.202307","journal-title":"J Inf Sci Eng"},{"key":"17949_CR107","doi-asserted-by":"publisher","unstructured":"Wu Z, Zou X, Zhou W, Huang J (2023) YOLOX-PAI: an improved YOLOX, stronger and faster than YOLOv6. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2208.13040","DOI":"10.48550\/arXiv.2208.13040"},{"key":"17949_CR108","doi-asserted-by":"publisher","unstructured":"Liu S, Huang D, Wang Y (2019) Learning spatial fusion for single-shot object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1911.09516","DOI":"10.48550\/arXiv.1911.09516"},{"key":"17949_CR109","unstructured":"Jocher, G, Chaurasia, A, Qiu, J (2023). YOLO by Ultralytics (Version 8.0.0) [Computer software]. https:\/\/github.com\/ultralytics\/ultralytics"},{"key":"17949_CR110","doi-asserted-by":"publisher","unstructured":"Xu X, Jiang Y, Chen W, Huang Y, Zhang Y, Sun X (2022) DAMO-YOLO: a report on real-time object detection design. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2211.15444","DOI":"10.48550\/arXiv.2211.15444"},{"key":"17949_CR111","doi-asserted-by":"publisher","unstructured":"Sun Z, Lin M, Sun X, Tan Z, Li H, Jin R (2022) MAE-DET: revisiting maximum entropy principle in zero-shot NAS for efficient object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2111.13336","DOI":"10.48550\/arXiv.2111.13336"},{"key":"17949_CR112","doi-asserted-by":"publisher","unstructured":"Jiang Y, Tan Z, Wang J, Sun X, Lin M, Li H (2022) GiraffeDet: a heavy-neck paradigm for object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2202.04256","DOI":"10.48550\/arXiv.2202.04256"},{"key":"17949_CR113","doi-asserted-by":"publisher","unstructured":"Huang L, Yang Y, Deng Y, Yu Y (2015) DenseBox: unifying landmark localization with end to end object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1509.04874","DOI":"10.48550\/arXiv.1509.04874"},{"key":"17949_CR114","doi-asserted-by":"publisher","first-page":"2960","DOI":"10.1109\/CVPR.2019.00308","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"J Wang","year":"2019","unstructured":"Wang J, Chen K, Yang S, Loy CC, Lin D (2019) Region proposal by guided anchoring. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 2960\u20132969. https:\/\/doi.org\/10.1109\/CVPR.2019.00308"},{"key":"17949_CR115","doi-asserted-by":"publisher","first-page":"840","DOI":"10.1109\/CVPR.2019.00093","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"C Zhu","year":"2019","unstructured":"Zhu C, He Y, Savvides M (2019) Feature selective anchor-free module for single-shot object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 840\u2013849. https:\/\/doi.org\/10.1109\/CVPR.2019.00093"},{"key":"17949_CR116","doi-asserted-by":"publisher","first-page":"9626","DOI":"10.1109\/ICCV.2019.00972","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"Z Tian","year":"2019","unstructured":"Tian Z, Shen C, Chen H, He T (2019) FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 9626\u20139635. https:\/\/doi.org\/10.1109\/ICCV.2019.00972"},{"issue":"12","key":"17949_CR117","first-page":"1698","volume":"27","author":"M Iwasaki","year":"1986","unstructured":"Iwasaki M, Inomata H (1986) Relation between superficial capillaries and foveal structures in the human retina. Invest Ophthalmol Vis Sci 27(12):1698\u20131705","journal-title":"Invest Ophthalmol Vis Sci"},{"key":"17949_CR118","doi-asserted-by":"publisher","first-page":"7389","DOI":"10.1109\/TIP.2020.3002345","volume":"29","author":"T Kong","year":"2020","unstructured":"Kong T, Sun F, Liu H, Jiang Y, Li L, Shi J (2020) FoveaBox: Beyound anchor-based object detection. IEEE Trans Image Process 29:7389\u20137398. https:\/\/doi.org\/10.1109\/TIP.2020.3002345","journal-title":"IEEE Trans Image Process"},{"key":"17949_CR119","doi-asserted-by":"publisher","first-page":"91","DOI":"10.1007\/978-3-030-58545-7_6","volume":"12354","author":"C Zhu","year":"2020","unstructured":"Zhu C, Chen F, Shen Z (2020) Savvides M (2020) soft anchor-point object detection. Computer Vision - ECCV 12354:91\u2013107. https:\/\/doi.org\/10.1007\/978-3-030-58545-7_6","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR120","doi-asserted-by":"publisher","first-page":"9756","DOI":"10.1109\/CVPR42600.2020.00978","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"S Zhang","year":"2020","unstructured":"Zhang S, Chi C, Yao Y, Lei Z, Li S (2020) Bridging the gap between anchor-based and anchor-free detection via adaptive training sample selection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 9756\u20139765. https:\/\/doi.org\/10.1109\/CVPR42600.2020.00978"},{"key":"17949_CR121","doi-asserted-by":"publisher","first-page":"412","DOI":"10.1007\/978-3-030-58542-6_25","volume":"12367","author":"R Chen","year":"2020","unstructured":"Chen R, Liu Y, Zhang M, Liu S, Yu B (2020) Tai Y-W (2020) dive deeper into box for object detection. Computer Vision - ECCV 12367:412\u2013428. https:\/\/doi.org\/10.1007\/978-3-030-58542-6_25","journal-title":"Computer Vision - ECCV"},{"issue":"4","key":"17949_CR122","doi-asserted-by":"publisher","first-page":"1922","DOI":"10.1109\/TPAMI.2020.3032166","volume":"44","author":"Z Tian","year":"2022","unstructured":"Tian Z, Shen C, Chen H, He T (2022) FCOS: a simple and strong anchor-free object detector. IEEE Trans Pattern Anal Mach Intell 44(4):1922\u20131933. https:\/\/doi.org\/10.1109\/TPAMI.2020.3032166","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR123","doi-asserted-by":"publisher","unstructured":"Sun P, Jiang Y, Xie E, Shao W, Yuan Z, Wang C, Luo P (2020) What makes for end-to-end object detection? ArXiv. https:\/\/doi.org\/10.48550\/arXiv.2012.05780","DOI":"10.48550\/arXiv.2012.05780"},{"key":"17949_CR124","doi-asserted-by":"publisher","first-page":"14449","DOI":"10.1109\/CVPR46437.2021.01422","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"P Sun","year":"2021","unstructured":"Sun P, Zhang R, Jiang Y, Kong T, Xu C, Zhan W, Tomizuka M, Li L, Yuan Z, Wang C, Luo P (2021) Sparse R-CNN: end-to-end object detection with learnable proposals. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 14449\u201314458. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01422"},{"issue":"3","key":"17949_CR125","doi-asserted-by":"publisher","first-page":"642","DOI":"10.1007\/s11263-019-01204-1","volume":"128","author":"H Law","year":"2020","unstructured":"Law H, Deng J (2020) CornerNet: detecting objects as paired keypoints. Int J Comput Vis 128(3):642\u2013656. https:\/\/doi.org\/10.1007\/s11263-019-01204-1","journal-title":"Int J Comput Vis"},{"key":"17949_CR126","doi-asserted-by":"publisher","first-page":"16100","DOI":"10.1109\/CVPR46437.2021.01584","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"T Xu","year":"2021","unstructured":"Xu T, Takano W (2021) Graph stacked hourglass networks for 3D human pose estimation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 16100\u201316109. https:\/\/doi.org\/10.1109\/CVPR46437.2021.01584"},{"key":"17949_CR127","doi-asserted-by":"publisher","first-page":"6568","DOI":"10.1109\/ICCV.2019.00667","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"K Duan","year":"2019","unstructured":"Duan K, Bai S, Xie L, Qi H, Huang Q, Tian Q (2019) CenterNet: Keypoint triplets for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 6568\u20136577. https:\/\/doi.org\/10.1109\/ICCV.2019.00667"},{"key":"17949_CR128","doi-asserted-by":"publisher","first-page":"9656","DOI":"10.1109\/ICCV.2019.00975","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"Z Yang","year":"2019","unstructured":"Yang Z, Liu S, Hu H, Wang L, Lin S (2019) RepPoints: point set representation for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 9656\u20139665. https:\/\/doi.org\/10.1109\/ICCV.2019.00975"},{"key":"17949_CR129","doi-asserted-by":"publisher","first-page":"850","DOI":"10.1109\/CVPR.2019.00094","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"X Zhou","year":"2019","unstructured":"Zhou X, Zhuo J, Krahenbuhl P (2019) Bottom-up object detection by grouping extreme and center points. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 850\u2013859. https:\/\/doi.org\/10.1109\/CVPR.2019.00094"},{"key":"17949_CR130","doi-asserted-by":"publisher","first-page":"4940","DOI":"10.1109\/ICCV.2017.528","volume-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","author":"DP Papadopoulos","year":"2017","unstructured":"Papadopoulos DP, Uijlings JRR, Keller F, Ferrari V (2017) Extreme clicking for efficient object annotation. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 4940\u20134949. https:\/\/doi.org\/10.1109\/ICCV.2017.528"},{"key":"17949_CR131","doi-asserted-by":"publisher","unstructured":"Dong Z, Li G, Liao Y, Wang F, Ren P, Qian C (2020) CentripetalNet: pursuing high-quality keypoint pairs for object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2003.09119","DOI":"10.48550\/arXiv.2003.09119"},{"key":"17949_CR132","doi-asserted-by":"publisher","first-page":"10394","DOI":"10.1109\/CVPR42600.2020.01041","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"S Lan","year":"2020","unstructured":"Lan S, Ren Z, Wu YS, Davis L, Hua G (2020) SaccadeNet: a fast and accurate object detector. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 10394\u201310403. https:\/\/doi.org\/10.1109\/CVPR42600.2020.01041"},{"key":"17949_CR133","doi-asserted-by":"publisher","first-page":"399","DOI":"10.1007\/978-3-030-58580-8_24","volume":"12348","author":"K Duan","year":"2020","unstructured":"Duan K, Xie L, Qi H, Bai S, Huang Q, Tian Q (2020) Corner proposal network for anchor-free, two-stage object detection. Computer Vision - ECCV 12348:399\u2013416. https:\/\/doi.org\/10.1007\/978-3-030-58580-8_24","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR134","doi-asserted-by":"publisher","unstructured":"Zhou X, Koltun V, Krahenbuhl P (2021) Probabilistic two-stage detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2103.07461","DOI":"10.48550\/arXiv.2103.07461"},{"key":"17949_CR135","doi-asserted-by":"publisher","unstructured":"Liu W, Hasan I, Liao S (2023) Center and scale prediction: anchor-free approach for pedestrian and face detection. Pattern Recogn 135. https:\/\/doi.org\/10.1016\/j.patcog.2022.109071","DOI":"10.1016\/j.patcog.2022.109071"},{"key":"17949_CR136","doi-asserted-by":"publisher","first-page":"4457","DOI":"10.1109\/CVPR.2017.474","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"S Zhang","year":"2017","unstructured":"Zhang S, Benenson R, Schiele B (2017) CityPersons: a diverse dataset for pedestrian detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4457\u20134465. https:\/\/doi.org\/10.1109\/CVPR.2017.474"},{"key":"17949_CR137","doi-asserted-by":"publisher","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN, Kaiser L, Polosukhin I (2017) Attention is all you need. ArXiv. https:\/\/doi.org\/10.48550\/arXiv.1706.03762","DOI":"10.48550\/arXiv.1706.03762"},{"key":"17949_CR138","doi-asserted-by":"publisher","first-page":"213","DOI":"10.1007\/978-3-030-58452-8_13","volume":"12346","author":"N Carion","year":"2020","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A (2020) Zagoruyko S (2020) end-to-end object detection with transformers. Computer Vision - ECCV 12346:213\u2013229. https:\/\/doi.org\/10.1007\/978-3-030-58452-8_13","journal-title":"Computer Vision - ECCV"},{"issue":"11","key":"17949_CR139","doi-asserted-by":"publisher","first-page":"3212","DOI":"10.1109\/TNNLS.2018.2876865","volume":"30","author":"Z-Q Zhao","year":"2019","unstructured":"Zhao Z-Q, Zheng P, Xu S-t, Wu X (2019) Object detection with deep learning: a review. IEEE Transactions on Neural Networks and Learning Systems 30(11):3212\u20133232. https:\/\/doi.org\/10.1109\/TNNLS.2018.2876865","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"17949_CR140","doi-asserted-by":"publisher","unstructured":"Zhu X, Su W, Lu L, Li B, Wang X, Dai J (2021) Deformable DETR: deformable transformers for end-to-end object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2010.04159","DOI":"10.48550\/arXiv.2010.04159"},{"key":"17949_CR141","doi-asserted-by":"publisher","unstructured":"Devlin J, Chang M-W, Lee K, Toutanova K (2019) BERT: pre-training of deep bidirectional transformers for language understanding. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1810.04805","DOI":"10.48550\/arXiv.1810.04805"},{"key":"17949_CR142","unstructured":"Radford A, Wu J, Child R, Luan D, Amodei D, Sutskever I (2019) Language models are unsupervised multitask learners. OpenAI blog Accessed 2 Dec 2022. https:\/\/cdn.openai.com\/better-languagemodels\/language_models_are_unsupervised_multitask_learners.pdf"},{"key":"17949_CR143","doi-asserted-by":"publisher","first-page":"1601","DOI":"10.1109\/CVPR46437.2021.00165","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"Z Dai","year":"2021","unstructured":"Dai Z, Cai B, Lin Y, Chen J (2021) UP-DETR: unsupervised pre-training for object detection with transformers. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 1601\u20131610. https:\/\/doi.org\/10.1109\/CVPR46437.2021.00165"},{"key":"17949_CR144","doi-asserted-by":"publisher","unstructured":"Zheng M, Gao P, Zhang R, Li K, Wang X, Li H, Dong H (2021) End-to-end object detection with adaptive clustering transformer. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2011.09315","DOI":"10.48550\/arXiv.2011.09315"},{"key":"17949_CR145","doi-asserted-by":"publisher","unstructured":"Gregor K, Danihelka I, Graves A, Rezende DJ, Wierstra D (2015) DRAW: a recurrent neural network for image generation. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1502.04623","DOI":"10.48550\/arXiv.1502.04623"},{"key":"17949_CR146","doi-asserted-by":"publisher","unstructured":"Guo M, Zhang Y, Liu T, Aaai (2019) Gaussian transformer: a lightweight approach for natural language inference. Proceedings of the AAAI Conference on Artificial Intelligence 33(01):6489\u20136496. https:\/\/doi.org\/10.1609\/aaai.v33i01.33016489","DOI":"10.1609\/aaai.v33i01.33016489"},{"key":"17949_CR147","doi-asserted-by":"publisher","first-page":"3601","DOI":"10.1109\/ICCV48922.2021.00360","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"P Gao","year":"2021","unstructured":"Gao P, Zheng M, Wang X, Dai J, Li H (2021) Fast convergence of DETR with spatially modulated co-attention. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 3601\u20133610. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00360"},{"key":"17949_CR148","doi-asserted-by":"publisher","unstructured":"Yao Z, Ai J, Li B, Zhang C (2021) Efficient DETR: improving end-to-end object detector with dense prior. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2104.01318","DOI":"10.48550\/arXiv.2104.01318"},{"key":"17949_CR149","doi-asserted-by":"publisher","first-page":"3591","DOI":"10.1109\/ICCV48922.2021.00359","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"Z Sun","year":"2021","unstructured":"Sun Z, Cao S, Yang Y, Kitani K (2021) Rethinking transformer-based set prediction for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 3591\u20133600. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00359"},{"issue":"1","key":"17949_CR150","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. J Mach Learn Res 15(1):1929\u20131958 https:\/\/dl.acm.org\/doi\/abs\/10.5555\/2627435.2670313","journal-title":"J Mach Learn Res"},{"key":"17949_CR151","doi-asserted-by":"publisher","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1502.03167","DOI":"10.48550\/arXiv.1502.03167"},{"key":"17949_CR152","doi-asserted-by":"publisher","unstructured":"Fang Y, Liao B, Wang X, Fang J, Qi J, Wu R, Niu J, Liu W (2021) You only look at one sequence: rethinking transformer in vision through object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2106.00666","DOI":"10.48550\/arXiv.2106.00666"},{"key":"17949_CR153","doi-asserted-by":"publisher","first-page":"3631","DOI":"10.1109\/ICCV48922.2021.00363","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"D Meng","year":"2021","unstructured":"Meng D, Chen X, Fan Z, Zeng G, Li H, Yuan Y, Sun L, Wang J (2021) Conditional DETR for fast training convergence. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 3631\u20133640. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00363"},{"key":"17949_CR154","doi-asserted-by":"publisher","unstructured":"Wang Y, Zhang X, Yang T, Sun J (2021) Anchor DETR: query design for transformer-based object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2109.07107","DOI":"10.48550\/arXiv.2109.07107"},{"key":"17949_CR155","doi-asserted-by":"publisher","first-page":"4641","DOI":"10.1109\/ICCV48922.2021.00462","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"T Wang","year":"2021","unstructured":"Wang T, Yuan L, Chen Y, Feng J, Yan S (2021) PnP-DETR: towards efficient visual analysis with transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 4641\u20134650. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00462"},{"key":"17949_CR156","doi-asserted-by":"publisher","unstructured":"Chen T, Saxena S, Li L, Fleet DJ, Hinton GE (2021) Pix2seq: a language modeling framework for object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2109.10852","DOI":"10.48550\/arXiv.2109.10852"},{"key":"17949_CR157","doi-asserted-by":"publisher","unstructured":"Roh B, Shin J, Shin W, Kim S (2022) Sparse DETR: efficient end-to-end object detection with learnable sparsity. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2111.14330","DOI":"10.48550\/arXiv.2111.14330"},{"key":"17949_CR158","doi-asserted-by":"publisher","unstructured":"Liu S, Li F, Zhang H, Yang XB, Qi X, Su H, Zhu J, Zhang L (2022) DAB-DETR: dynamic anchor boxes are better queries for DETR. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2201.12329","DOI":"10.48550\/arXiv.2201.12329"},{"key":"17949_CR159","unstructured":"Wang W, Cao Y, Zhang J, Tao D (2022) FP-DETR: detection transformer advanced by fully pre-training. In: International conference on learning representations. https:\/\/openreview.net\/forum?id=yjMQuLLcGWK"},{"key":"17949_CR160","doi-asserted-by":"publisher","first-page":"2968","DOI":"10.1109\/ICCV48922.2021.00298","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"X Dai","year":"2021","unstructured":"Dai X, Chen Y, Yang J, Zhang P, Yuan L, Zhang L, IEEE (2021) Dynamic DETR: end-to-end object detection with dynamic attention. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 2968\u20132977. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00298"},{"key":"17949_CR161","doi-asserted-by":"publisher","first-page":"13609","DOI":"10.1109\/CVPR52688.2022.01325","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"F Li","year":"2022","unstructured":"Li F, Zhang H, Liu S, Guo J, Ni LM, Zhang L (2022) DN-DETR: accelerate DETR training by introducing query denoising. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 13609\u201313617. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01325"},{"key":"17949_CR162","doi-asserted-by":"publisher","unstructured":"Zhang H, Li F, Liu S, Zhang L, Su H, Zhu JM, Ni L, Shum H-Y (2022) DINO: DETR with improved denoising anchor boxes for end-to-end object detection. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2203.03605","DOI":"10.48550\/arXiv.2203.03605"},{"key":"17949_CR163","doi-asserted-by":"publisher","first-page":"323","DOI":"10.1007\/978-3-030-58604-1_20","volume":"12373","author":"D Zhang","year":"2020","unstructured":"Zhang D, Zhang H, Tang J, Wang M, Hua X, Sun Q (2020) Feature pyramid transformer. Computer Vision - ECCV 12373:323\u2013339. https:\/\/doi.org\/10.1007\/978-3-030-58604-1_20","journal-title":"Computer Vision - ECCV"},{"key":"17949_CR164","doi-asserted-by":"publisher","first-page":"548","DOI":"10.1109\/ICCV48922.2021.00061","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"W Wang","year":"2021","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2021) Pyramid vision transformer: a versatile backbone for dense prediction without convolutions. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 548\u2013558. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00061"},{"key":"17949_CR165","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1109\/IALP51396.2020.9310460","volume-title":"Proceedings of the international conference on Asian language processing (IALP)","author":"H Wang","year":"2020","unstructured":"Wang H, Tu M (2020) Enhancing attention models via multi-head collaboration. In: Proceedings of the international conference on Asian language processing (IALP), pp 19\u201323. https:\/\/doi.org\/10.1109\/IALP51396.2020.9310460"},{"key":"17949_CR166","doi-asserted-by":"publisher","first-page":"2978","DOI":"10.1109\/ICCV48922.2021.00299","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"P Zhang","year":"2021","unstructured":"Zhang P, Dai X, Yang J, Xiao B, Yuan L, Zhang L, Gao J (2021) Multi-scale vision longformer: a new vision transformer for high-resolution image encoding. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 2978\u20132988. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00299"},{"key":"17949_CR167","doi-asserted-by":"publisher","unstructured":"Beltagy IE, Peters M, Cohan A (2020) Longformer: the long-document transformer. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2004.05150","DOI":"10.48550\/arXiv.2004.05150"},{"key":"17949_CR168","doi-asserted-by":"publisher","unstructured":"Lu J, Batra D, Parikh D, Lee S (2019) ViLBERT: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1908.02265","DOI":"10.48550\/arXiv.1908.02265"},{"key":"17949_CR169","doi-asserted-by":"publisher","unstructured":"Yang J, Li C, Zhang P, Dai X, Xiao B, Yuan L, Gao J (2021) Focal self-attention for local-global interactions in vision transformers. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2107.00641","DOI":"10.48550\/arXiv.2107.00641"},{"key":"17949_CR170","doi-asserted-by":"publisher","first-page":"9992","DOI":"10.1109\/ICCV48922.2021.00986","volume-title":"Proceedings of the IEEE\/CVF international conference on computer vision (ICCV)","author":"Z Liu","year":"2021","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z, Lin S, Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp 9992\u201310002. https:\/\/doi.org\/10.1109\/ICCV48922.2021.00986"},{"key":"17949_CR171","doi-asserted-by":"publisher","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S, Uszkoreit J, Houlsby N (2021) An image is worth 16x16 words: transformers for image recognition at scale. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2010.11929","DOI":"10.48550\/arXiv.2010.11929"},{"key":"17949_CR172","doi-asserted-by":"publisher","unstructured":"Yuan Y, Fu R, Huang L, Lin W, Zhang C, Chen X, Wang J (2021) HRFormer: high-resolution transformer for dense prediction. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2110.09408","DOI":"10.48550\/arXiv.2110.09408"},{"issue":"10","key":"17949_CR173","doi-asserted-by":"publisher","first-page":"3349","DOI":"10.1109\/TPAMI.2020.2983686","volume":"43","author":"J Wang","year":"2021","unstructured":"Wang J, Sun K, Cheng T, Jiang B, Deng C, Zhao Y, Liu D, Mu Y, Tan M, Wang X, Liu W, Xiao B (2021) Deep high-resolution representation learning for visual recognition. IEEE Trans Pattern Anal Mach Intell 43(10):3349\u20133364. https:\/\/doi.org\/10.1109\/TPAMI.2020.2983686","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR174","doi-asserted-by":"publisher","unstructured":"Dai Z, Liu H, Le QV, Tan M (2021) CoAtNet: marrying convolution and attention for all data sizes. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2106.04803","DOI":"10.48550\/arXiv.2106.04803"},{"key":"17949_CR175","doi-asserted-by":"publisher","unstructured":"Xiao T, Singh M, Mintun E, Darrell T, Dollar P, Girshick R (2021) Early convolutions help transformers see better. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2106.14881","DOI":"10.48550\/arXiv.2106.14881"},{"key":"17949_CR176","doi-asserted-by":"publisher","first-page":"12084","DOI":"10.1109\/CVPR52688.2022.01178","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"J Gu","year":"2022","unstructured":"Gu J, Kwon H, Wang D, Ye W, Li M, Chen YH, Lai L, Chandra V, Pan DZ (2022) Multi-scale high-resolution vision transformer for semantic segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 12084\u201312093. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01178"},{"issue":"3","key":"17949_CR177","doi-asserted-by":"publisher","first-page":"415","DOI":"10.1007\/s41095-022-0274-8","volume":"8","author":"W Wang","year":"2022","unstructured":"Wang W, Xie E, Li X, Fan D-P, Song K, Liang D, Lu T, Luo P, Shao L (2022) PVT v2: improved baselines with pyramid vision transformer. Computational Visual Media 8(3):415\u2013424. https:\/\/doi.org\/10.1007\/s41095-022-0274-8","journal-title":"Computational Visual Media"},{"key":"17949_CR178","doi-asserted-by":"publisher","unstructured":"Chu X, Tian Z, Zhang B, Wang X, Shen C (2021) Conditional positional encodings for vision transformers. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2102.10882","DOI":"10.48550\/arXiv.2102.10882"},{"key":"17949_CR179","doi-asserted-by":"publisher","unstructured":"Li Y, Zhang K, Cao J, Timofte R, Gool LV (2021) LocalViT: bringing locality to vision transformers. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.2104.05707","DOI":"10.48550\/arXiv.2104.05707"},{"key":"17949_CR180","doi-asserted-by":"publisher","unstructured":"Hendrycks D, Gimpel K (2016) Gaussian error linear units (GELUs). Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1606.08415","DOI":"10.48550\/arXiv.1606.08415"},{"key":"17949_CR181","doi-asserted-by":"publisher","first-page":"11999","DOI":"10.1109\/CVPR52688.2022.01170","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"Z Liu","year":"2022","unstructured":"Liu Z, Hu H, Lin Y, Yao Z, Xie Z, Wei Y, Ning J, Cao Y, Zhang Z, Dong L, Wei F, Guo B (2022) Swin transformer V2: scaling up capacity and resolution. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 11999\u201312009. https:\/\/doi.org\/10.1109\/CVPR52688.2022.01170"},{"key":"17949_CR182","doi-asserted-by":"publisher","first-page":"9643","DOI":"10.1109\/CVPR52688.2022.00943","volume-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR)","author":"Z Xie","year":"2022","unstructured":"Xie Z, Zhang Z, Cao Y, Lin Y, Bao J, Yao Z, Dai Q, Hu H (2022) SimMIM: a simple framework for masked image modeling. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp 9643\u20139653. https:\/\/doi.org\/10.1109\/CVPR52688.2022.00943"},{"issue":"8","key":"17949_CR183","doi-asserted-by":"publisher","first-page":"1845","DOI":"10.1109\/TPAMI.2017.2738644","volume":"40","author":"S Yang","year":"2018","unstructured":"Yang S, Luo P, Loy CC, Tang X (2018) Faceness-net: face detection through deep facial part responses. IEEE Trans Pattern Anal Mach Intell 40(8):1845\u20131859. https:\/\/doi.org\/10.1109\/TPAMI.2017.2738644","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR184","doi-asserted-by":"publisher","first-page":"180","DOI":"10.1016\/j.neucom.2019.10.087","volume":"380","author":"J Zhang","year":"2020","unstructured":"Zhang J, Wu X, Zhu J, Hoi SCH (2020) Feature agglomeration networks for single stage face detection. Neurocomputing 380:180\u2013189. https:\/\/doi.org\/10.1016\/j.neucom.2019.10.087","journal-title":"Neurocomputing"},{"key":"17949_CR185","doi-asserted-by":"publisher","first-page":"4885","DOI":"10.1109\/ICCV.2017.522","volume-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","author":"M Najibi","year":"2017","unstructured":"Najibi M, Samangouei P, Chellappa R, Davis LS (2017) SSH: single stage headless face detector. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 4885\u20134894. https:\/\/doi.org\/10.1109\/ICCV.2017.522"},{"key":"17949_CR186","doi-asserted-by":"publisher","first-page":"192","DOI":"10.1109\/ICCV.2017.30","volume-title":"Proceedings of the IEEE international conference on computer vision (ICCV)","author":"S Zhang","year":"2017","unstructured":"Zhang S, Zhu X, Lei Z, Shi H, Wang X, Li SZ (2017) S FD: single shot scale-invariant face detector. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 192\u2013201. https:\/\/doi.org\/10.1109\/ICCV.2017.30"},{"key":"17949_CR187","doi-asserted-by":"publisher","first-page":"1609","DOI":"10.1109\/TMM.2021.3068840","volume":"24","author":"J Liang","year":"2022","unstructured":"Liang J, Wang J, Quan Y, Chen T, Liu J, Ling H, Xu Y (2022) Recurrent exposure generation for low-light face detection. IEEE Trans Multimedia 24:1609\u20131621. https:\/\/doi.org\/10.1109\/TMM.2021.3068840","journal-title":"IEEE Trans Multimedia"},{"issue":"10","key":"17949_CR188","doi-asserted-by":"publisher","first-page":"5962","DOI":"10.1109\/TPAMI.2021.3087709","volume":"44","author":"J Deng","year":"2019","unstructured":"Deng J, Guo J, Xue N, Zafeiriou S (2019) ArcFace: additive angular margin loss for deep face recognition. IEEE Trans Pattern Anal Mach Intell 44(10):5962\u20135979. https:\/\/doi.org\/10.1109\/TPAMI.2021.3087709","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"17949_CR189","doi-asserted-by":"publisher","first-page":"47","DOI":"10.1016\/j.imavis.2019.04.007","volume":"87","author":"C-C Hendry","year":"2019","unstructured":"Hendry C-C (2019) Automatic license plate recognition via sliding-window darknet-YOLO deep learning. Image Vis Comput 87:47\u201356. https:\/\/doi.org\/10.1016\/j.imavis.2019.04.007","journal-title":"Image Vis Comput"},{"key":"17949_CR190","doi-asserted-by":"publisher","first-page":"196","DOI":"10.1016\/j.optlastec.2018.08.007","volume":"110","author":"S Yang","year":"2019","unstructured":"Yang S, Zhang J, Bo C, Wang M, Chen L (2019) Fast vehicle logo detection in complex scenes. Opt Laser Technol 110:196\u2013201. https:\/\/doi.org\/10.1016\/j.optlastec.2018.08.007","journal-title":"Opt Laser Technol"},{"key":"17949_CR191","doi-asserted-by":"publisher","first-page":"851","DOI":"10.1109\/ITSC.2018.8569522","volume-title":"Proceedings of the 21st international conference on intelligent transportation systems (ITSC)","author":"M Bach","year":"2018","unstructured":"Bach M, Stumper D, Dietmayer K (2018) Deep convolutional traffic light recognition for automated driving. In: Proceedings of the 21st international conference on intelligent transportation systems (ITSC), pp 851\u2013858. https:\/\/doi.org\/10.1109\/ITSC.2018.8569522"},{"key":"17949_CR192","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/IJCNN.2018.8489623","volume-title":"Proceedings of the international joint conference on neural networks (IJCNN)","author":"D Li","year":"2018","unstructured":"Li D, Zhao D, Chen Y, Zhang Q (2018) DeepSign: deep learning based traffic sign recognition. In: Proceedings of the international joint conference on neural networks (IJCNN), pp 1\u20136. https:\/\/doi.org\/10.1109\/IJCNN.2018.8489623"},{"key":"17949_CR193","doi-asserted-by":"publisher","first-page":"114","DOI":"10.2991\/cimns-18.2018.26","volume-title":"Proceedings of the 3rd international conference on communications, information management and network security (CIMNS)","author":"Q Jinxing","year":"2018","unstructured":"Jinxing Q, Bo Q (2018) Fast license plate recognition method based on competitive neural network. In: Proceedings of the 3rd international conference on communications, information management and network security (CIMNS), pp 114\u2013117. https:\/\/doi.org\/10.2991\/cimns-18.2018.26"},{"key":"17949_CR194","doi-asserted-by":"publisher","first-page":"10304","DOI":"10.1109\/ACCESS.2021.3050484","volume":"9","author":"Z Zheng","year":"2021","unstructured":"Zheng Z, Zhao J, Li Y (2021) Research on detecting bearing-cover defects based on improved YOLOv3. IEEE Access 9:10304\u201310315. https:\/\/doi.org\/10.1109\/ACCESS.2021.3050484","journal-title":"IEEE Access"},{"key":"17949_CR195","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1109\/ICPECA56706.2023.10075704","volume-title":"Proceedings of the IEEE 3rd international conference on power, electronics and computer applications (ICPECA)","author":"Q Wu","year":"2023","unstructured":"Wu Q, Wang C, Han Y, Kang Q, Li J, Lu X (2023) Object detection of double-sided copper laminates based on YOLOv5. In: Proceedings of the IEEE 3rd international conference on power, electronics and computer applications (ICPECA), pp 171\u2013175. https:\/\/doi.org\/10.1109\/ICPECA56706.2023.10075704"},{"key":"17949_CR196","doi-asserted-by":"publisher","unstructured":"Guo C, Lv X-l, Zhang Y, Zhang M-l (2021) Improved YOLOv4-tiny network for real-time electronic component detection. Sci Rep 11(1). https:\/\/doi.org\/10.1038\/s41598-021-02225-y","DOI":"10.1038\/s41598-021-02225-y"},{"issue":"1","key":"17949_CR197","doi-asserted-by":"publisher","first-page":"61","DOI":"10.1049\/iet-cvi.2018.5002","volume":"13","author":"Y Chao","year":"2019","unstructured":"Chao Y, Chen X, Xiao N (2019) Deep learning-based grasp-detection method for a five-fingered industrial robot hand. IET Comput Vis 13(1):61\u201370. https:\/\/doi.org\/10.1049\/iet-cvi.2018.5002","journal-title":"IET Comput Vis"},{"key":"17949_CR198","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/IJCNN54540.2023.10191285","volume-title":"Proceedings of the international joint conference on neural networks (IJCNN)","author":"Z Zhang","year":"2023","unstructured":"Zhang Z, Zhou M, Wan H, Li M, Li G (2023) DCP-net: the defect detection method of industrial product based on dual collaborative paths. In: Proceedings of the international joint conference on neural networks (IJCNN), pp 1\u20138. https:\/\/doi.org\/10.1109\/IJCNN54540.2023.10191285"},{"issue":"5","key":"17949_CR199","doi-asserted-by":"publisher","first-page":"2486","DOI":"10.1109\/TGRS.2016.2645610","volume":"55","author":"Y Long","year":"2017","unstructured":"Long Y, Gong Y, Xiao Z, Liu Q (2017) Accurate object localization in remote sensing images based on convolutional neural networks. IEEE Trans Geosci Remote Sens 55(5):2486\u20132498. https:\/\/doi.org\/10.1109\/TGRS.2016.2645610","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"12","key":"17949_CR200","doi-asserted-by":"publisher","first-page":"7405","DOI":"10.1109\/TGRS.2016.2601622","volume":"54","author":"G Cheng","year":"2016","unstructured":"Cheng G, Zhou P, Han J (2016) Learning rotation-invariant convolutional neural networks for object detection in VHR optical remote sensing images. IEEE Trans Geosci Remote Sens 54(12):7405\u20137415. https:\/\/doi.org\/10.1109\/TGRS.2016.2601622","journal-title":"IEEE Trans Geosci Remote Sens"},{"issue":"2","key":"17949_CR201","doi-asserted-by":"publisher","first-page":"310","DOI":"10.1109\/LGRS.2018.2872355","volume":"16","author":"C Wang","year":"2019","unstructured":"Wang C, Bai X, Wang S, Zhou J, Ren P (2019) Multiscale visual attention networks for object detection in VHR remote sensing images. IEEE Geosci Remote Sens Lett 16(2):310\u2013314. https:\/\/doi.org\/10.1109\/LGRS.2018.2872355","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"17949_CR202","doi-asserted-by":"publisher","first-page":"294","DOI":"10.1016\/j.isprsjprs.2020.01.025","volume":"161","author":"K Fu","year":"2020","unstructured":"Fu K, Chang Z, Zhang Y, Xu G, Zhang K, Sun X (2020) Rotation-aware and multi-scale convolutional neural network for object detection in remote sensing images. ISPRS J Photogramm Remote Sens 161:294\u2013308. https:\/\/doi.org\/10.1016\/j.isprsjprs.2020.01.025","journal-title":"ISPRS J Photogramm Remote Sens"},{"key":"17949_CR203","doi-asserted-by":"publisher","first-page":"167448","DOI":"10.1109\/ACCESS.2020.3021660","volume":"8","author":"M Sharif","year":"2020","unstructured":"Sharif M, Amin J, Siddiqa A, Khan HU, Arshad Malik MS, Anjum MA, Kadry S (2020) Recognition of different types of leukocytes using YOLOv2 and optimized bag-of-features. IEEE Access 8:167448\u2013167459. https:\/\/doi.org\/10.1109\/ACCESS.2020.3021660","journal-title":"IEEE Access"},{"key":"17949_CR204","doi-asserted-by":"publisher","unstructured":"LaLonde R, Bagci U (2018) Capsules for object segmentation. Arxiv. https:\/\/doi.org\/10.48550\/arXiv.1804.04241","DOI":"10.48550\/arXiv.1804.04241"},{"key":"17949_CR205","doi-asserted-by":"publisher","first-page":"178563","DOI":"10.1109\/ACCESS.2020.3026483","volume":"8","author":"H Bai","year":"2020","unstructured":"Bai H, Zhang T, Lu C, Chen W, Xu F, Han Z-B (2020) Chromosome extraction based on U-net and YOLOv3. IEEE Access 8:178563\u2013178569. https:\/\/doi.org\/10.1109\/ACCESS.2020.3026483","journal-title":"IEEE Access"},{"key":"17949_CR206","doi-asserted-by":"publisher","DOI":"10.1016\/j.compmedimag.2020.101732","volume":"82","author":"Z Zhuang","year":"2020","unstructured":"Zhuang Z, Liu G, Ding W, Raj ANJ, Qiu S, Guo J, Yuan Y (2020) Cardiac VFM visualization and analysis based on YOLO deep learning model and modified 2D continuity equation. Comput Med Imaging Graph 82:101732. https:\/\/doi.org\/10.1016\/j.compmedimag.2020.101732","journal-title":"Comput Med Imaging Graph"},{"key":"17949_CR207","doi-asserted-by":"publisher","unstructured":"Schubert PJ, Dorkenwald S, Januszewski M, Jain V, Kornfeld J (2019) Learning cellular morphology with neural networks. Nat Commun 10. https:\/\/doi.org\/10.1038\/s41467-019-10836-3","DOI":"10.1038\/s41467-019-10836-3"},{"key":"17949_CR208","doi-asserted-by":"publisher","unstructured":"Aly GH, Marey M, El-Sayed SA, Tolba MF (2021) YOLO based breast masses detection and classification in full-field digital mammograms. Comput Methods Prog Biomed 200. https:\/\/doi.org\/10.1016\/j.cmpb.2020.105823","DOI":"10.1016\/j.cmpb.2020.105823"},{"key":"17949_CR209","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/2FIJCNN.2012.6252784","volume-title":"Proceedings of the international joint conference on neural networks (IJCNN)","author":"A Garcez","year":"2012","unstructured":"Garcez A, Zaverucha G (2012) Multi-instance learning using recurrent neural networks. In: Proceedings of the international joint conference on neural networks (IJCNN), pp 1\u20136. https:\/\/doi.org\/10.1109\/2FIJCNN.2012.6252784"},{"key":"17949_CR210","doi-asserted-by":"publisher","first-page":"422","DOI":"10.1109\/ICCECE51280.2021.9342470","volume-title":"Proceedings of the IEEE international conference on consumer electronics and computer engineering (ICCECE)","author":"Y Yang","year":"2021","unstructured":"Yang Y, Liao Y, Ni S, Lin C (2021) Study of algorithm for aerial object detection based on lightweight neural network. In: Proceedings of the IEEE international conference on consumer electronics and computer engineering (ICCECE), pp 422\u2013426. https:\/\/doi.org\/10.1109\/ICCECE51280.2021.9342470"},{"issue":"8","key":"17949_CR211","doi-asserted-by":"publisher","first-page":"3195","DOI":"10.1109\/TNNLS.2021.3053249","volume":"33","author":"L Jiao","year":"2022","unstructured":"Jiao L, Zhang R, Liu F, Yang S, Hou B, Li L, Tang X (2022) New generation deep learning for video object detection: a survey. IEEE Transactions on Neural Networks and Learning Systems 33(8):3195\u20133215. https:\/\/doi.org\/10.1109\/TNNLS.2021.3053249","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"issue":"11","key":"17949_CR212","doi-asserted-by":"publisher","first-page":"139","DOI":"10.1145\/3422622","volume":"63","author":"IJ Goodfellow","year":"2014","unstructured":"Goodfellow IJ, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. Commun ACM 63(11):139\u2013144 https:\/\/dl.acm.org\/doi\/10.1145\/3422622","journal-title":"Commun ACM"},{"key":"17949_CR213","doi-asserted-by":"publisher","first-page":"3039","DOI":"10.1109\/CVPR.2017.324","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","author":"X Wang","year":"2017","unstructured":"Wang X, Shrivastava A, Gupta A (2017) A-fast-RCNN: hard positive generation via adversary for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3039\u20133048. https:\/\/doi.org\/10.1109\/CVPR.2017.324"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17949-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-023-17949-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-023-17949-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,8]],"date-time":"2024-07-08T17:38:10Z","timestamp":1720460290000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-023-17949-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,1,12]]},"references-count":213,"journal-issue":{"issue":"24","published-online":{"date-parts":[[2024,7]]}},"alternative-id":["17949"],"URL":"https:\/\/doi.org\/10.1007\/s11042-023-17949-4","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,1,12]]},"assertion":[{"value":"30 May 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"27 October 2023","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"17 December 2023","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 January 2024","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"We declare that we have no known competing financial interests or personal relationships that could have appeared to influence the work reported in this paper. We confirm that this work is original and has not been published elsewhere, nor is it currently under consideration for publication elsewhere.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interests"}}]}}