{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T17:16:03Z","timestamp":1780334163669,"version":"3.54.1"},"reference-count":69,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2022,3,24]],"date-time":"2022-03-24T00:00:00Z","timestamp":1648080000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,3,24]],"date-time":"2022-03-24T00:00:00Z","timestamp":1648080000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100004410","name":"T\u00fcrkiye Bilimsel ve Teknolojik Ara\u015ftirma Kurumu","doi-asserted-by":"publisher","award":["118C301"],"award-info":[{"award-number":["118C301"]}],"id":[{"id":"10.13039\/501100004410","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Multimed Info Retr"],"published-print":{"date-parts":[[2022,6]]},"DOI":"10.1007\/s13735-022-00229-6","type":"journal-article","created":{"date-parts":[[2022,3,25]],"date-time":"2022-03-25T04:40:28Z","timestamp":1648183228000},"page":"171-188","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":13,"title":["PDS-Net: A novel point and depth-wise separable convolution for real-time object detection"],"prefix":"10.1007","volume":"11","author":[{"ORCID":"https:\/\/orcid.org\/0000-0003-3592-4601","authenticated-orcid":false,"given":"Masum Shah","family":"Junayed","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9928-5776","authenticated-orcid":false,"given":"Md Baharul","family":"Islam","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Hassan","family":"Imani","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Tarkan","family":"Aydin","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,3,24]]},"reference":[{"issue":"07","key":"229_CR1","doi-asserted-by":"publisher","first-page":"1245","DOI":"10.1142\/S0218001409007624","volume":"23","author":"J Ning","year":"2009","unstructured":"Ning J, Zhang L, Zhang D, Wu C (2009) Robust object tracking using joint color-texture histogram. Int J Pattern Recognit Artif Intell 23(07):1245\u20131263","journal-title":"Int J Pattern Recognit Artif Intell"},{"key":"229_CR2","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: IEEE computer society conference on computer vision and pattern recognition (CVPR\u201905), vol 1. IEEE, pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"issue":"4","key":"229_CR3","doi-asserted-by":"publisher","first-page":"219","DOI":"10.1007\/s13735-016-0107-6","volume":"5","author":"MR Mani","year":"2016","unstructured":"Mani MR, Potukuchi D, Satyanarayana C (2016) A novel approach for shape-based object recognition with curvelet transform. Int J Multimed Inf Retriev 5(4):219\u2013228","journal-title":"Int J Multimed Inf Retriev"},{"issue":"6","key":"229_CR4","first-page":"33","volume":"29","author":"EH Adelson","year":"1984","unstructured":"Adelson EH, Anderson CH, Bergen JR, Burt PJ, Ogden JM (1984) Pyramid methods in image processing. RCA Eng 29(6):33\u201341","journal-title":"RCA Eng"},{"issue":"2","key":"229_CR5","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/s13735-019-00171-0","volume":"8","author":"BT Bastian","year":"2019","unstructured":"Bastian BT, Jiji CV (2019) Pedestrian detection using first-and second-order aggregate channel features. Int J Multimed Inf Retriev 8(2):127\u2013133","journal-title":"Int J Multimed Inf Retriev"},{"key":"229_CR6","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: IEEE conference on computer vision and pattern recognition. IEEE, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"issue":"3","key":"229_CR7","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M et al (2015) Imagenet large scale visual recognition challenge. Int J Comput Vision 115(3):211\u2013252","journal-title":"Int J Comput Vision"},{"key":"229_CR8","doi-asserted-by":"crossref","unstructured":"Geiger A, Lenz P, Urtasun R (2012) Are we ready for autonomous driving? the Kitti vision benchmark suite. In: 2012 IEEE conference on computer vision and pattern recognition. IEEE, pp 3354\u20133361","DOI":"10.1109\/CVPR.2012.6248074"},{"issue":"1","key":"229_CR9","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham M, Eslami SA, Van Gool L, Williams CK, Winn J, Zisserman A (2015) The pascal visual object classes challenge: a retrospective. Int J Comput Vision 111(1):98\u2013136","journal-title":"Int J Comput Vision"},{"key":"229_CR10","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Doll\u00e1r P, Zitnick CL (2014) Microsoft coco: common objects in context. In: European conference on computer vision. Springer, pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"229_CR11","doi-asserted-by":"publisher","first-page":"104117","DOI":"10.1016\/j.imavis.2021.104117","volume":"107","author":"R Solovyev","year":"2021","unstructured":"Solovyev R, Wang W, Gabruseva T (2021) Weighted boxes fusion: ensembling boxes from different object detection models. Image Vis Comput 107:104117","journal-title":"Image Vis Comput"},{"key":"229_CR12","doi-asserted-by":"publisher","first-page":"104099","DOI":"10.1016\/j.imavis.2021.104099","volume":"107","author":"C Shi","year":"2021","unstructured":"Shi C, Zhang W, Duan C, Chen H (2021) A pooling-based feature pyramid network for salient object detection. Image Vis Comput 107:104099","journal-title":"Image Vis Comput"},{"key":"229_CR13","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast r-cnn. In: Proceedings of the IEEE international conference on computer vision, pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"229_CR14","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) Ssd: single shot multibox detector. In: European conference on computer vision. Springer, pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"229_CR15","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Doll\u00e1r P, Girshick R (2017) Mask R-CNN. In: Proceedings of the IEEE international conference on computer vision, pp 2961\u20132969","DOI":"10.1109\/ICCV.2017.322"},{"key":"229_CR16","unstructured":"Dai J, Li Y, He K, Sun J R-FCN: object detection via region-based fully convolutional networks. arXiv:1605.06409"},{"key":"229_CR17","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"229_CR18","unstructured":"Redmon J, Farhadi A Yolov3: an incremental improvement. arXiv:1804.02767"},{"key":"229_CR19","doi-asserted-by":"crossref","unstructured":"Soviany P, Ionescu RT (2018) Optimizing the trade-off between single-stage and two-stage deep object detectors using image difficulty prediction. In: 20th international symposium on symbolic and numeric algorithms for scientific computing (SYNASC). IEEE, pp 209\u2013214","DOI":"10.1109\/SYNASC.2018.00041"},{"key":"229_CR20","doi-asserted-by":"publisher","first-page":"103911","DOI":"10.1016\/j.imavis.2020.103911","volume":"97","author":"S Wu","year":"2020","unstructured":"Wu S, Li X, Wang X (2020) IOU-aware single-stage object detector for accurate localization. Image Vis Comput 97:103911","journal-title":"Image Vis Comput"},{"key":"229_CR21","unstructured":"Ren S, He K, Girshick R, Sun J Faster R-CNN: Towards real-time object detection with region proposal networks. arXiv:1506.01497"},{"key":"229_CR22","doi-asserted-by":"crossref","unstructured":"Kong T, Sun F, Yao A, Liu H, Lu M, Chen Y (2017) Ron: Reverse connection with objectness prior networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5936\u20135944","DOI":"10.1109\/CVPR.2017.557"},{"key":"229_CR23","doi-asserted-by":"publisher","first-page":"8714","DOI":"10.1109\/ACCESS.2018.2801813","volume":"6","author":"BAG de Oliveira","year":"2018","unstructured":"de Oliveira BAG, Ferreira FMF, da Silva Martins CAP (2018) Fast and lightweight object detection network: detection and recognition on resource constrained devices. IEEE Access 6:8714\u20138724","journal-title":"IEEE Access"},{"key":"229_CR24","doi-asserted-by":"publisher","first-page":"144134","DOI":"10.1109\/ACCESS.2019.2945834","volume":"7","author":"D Wang","year":"2019","unstructured":"Wang D, Chen X, Yi H, Zhao F (2019) Improvement of non-maximum suppression in RGB-D object detection. IEEE Access 7:144134\u2013144143","journal-title":"IEEE Access"},{"key":"229_CR25","unstructured":"Bochkovskiy A, Wang C-Y, Liao H-YM Yolov4: Optimal speed and accuracy of object detection. arXiv:2004.10934"},{"key":"229_CR26","doi-asserted-by":"crossref","unstructured":"Chollet F (2017) Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1251\u20131258","DOI":"10.1109\/CVPR.2017.195"},{"issue":"6","key":"229_CR27","doi-asserted-by":"publisher","first-page":"1137","DOI":"10.1109\/TPAMI.2016.2577031","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren S, He K, Girshick R, Sun J (2016) Faster R-CNN: towards real-time object detection with region proposal networks. IEEE Trans Pattern Anal Mach Intell 39(6):1137\u20131149","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"229_CR28","doi-asserted-by":"crossref","unstructured":"Li Y, Chen Y, Wang N, Zhang Z (2019) Scale-aware trident networks for object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6054\u20136063","DOI":"10.1109\/ICCV.2019.00615"},{"key":"229_CR29","doi-asserted-by":"crossref","unstructured":"Najibi M, Singh B, Davis LS (2019) Autofocus: efficient multi-scale inference. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 9745\u20139755","DOI":"10.1109\/ICCV.2019.00984"},{"key":"229_CR30","doi-asserted-by":"crossref","unstructured":"Bell S, Zitnick CL, Bala K, Girshick R (2016) Inside-outside net: detecting objects in context with skip pooling and recurrent neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2874\u20132883","DOI":"10.1109\/CVPR.2016.314"},{"key":"229_CR31","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Doll\u00e1r P, Girshick R, He K, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 2117\u20132125","DOI":"10.1109\/CVPR.2017.106"},{"key":"229_CR32","doi-asserted-by":"crossref","unstructured":"Shrivastava A, Gupta A, Girshick R (2016) Training region-based object detectors with online hard example mining. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 761\u2013769","DOI":"10.1109\/CVPR.2016.89"},{"key":"229_CR33","doi-asserted-by":"crossref","unstructured":"Jiang B, Luo R, Mao J, Xiao T, Jiang Y (2018) Acquisition of localization confidence for accurate object detection. In: Proceedings of the European conference on computer vision (ECCV), pp 784\u2013799","DOI":"10.1007\/978-3-030-01264-9_48"},{"key":"229_CR34","doi-asserted-by":"crossref","unstructured":"Kong T, Yao A, Chen Y, Sun F (2016) Hypernet: towards accurate region proposal generation and joint object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 845\u2013853","DOI":"10.1109\/CVPR.2016.98"},{"key":"229_CR35","doi-asserted-by":"crossref","unstructured":"Cai Z, Fan Q, Feris R.\u00a0S, Vasconcelos N (2016) A unified multi-scale deep convolutional neural network for fast object detection. In: European conference on computer vision. Springer, pp 354\u2013370","DOI":"10.1007\/978-3-319-46493-0_22"},{"key":"229_CR36","doi-asserted-by":"crossref","unstructured":"Liu S, Qi L, Qin H, Shi J, Jia J (2018) Path aggregation network for instance segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 8759\u20138768","DOI":"10.1109\/CVPR.2018.00913"},{"key":"229_CR37","doi-asserted-by":"crossref","unstructured":"Zhou P, Ni B, Geng C, Hu J, Xu Y (2018) Scale-transferrable object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 528\u2013537","DOI":"10.1109\/CVPR.2018.00062"},{"key":"229_CR38","doi-asserted-by":"crossref","unstructured":"Zhao Q, Sheng T, Wang Y, Tang Z, Chen Y, Cai L, Ling H (2019) M2det: A single-shot object detector based on multi-level feature pyramid network. In: Proceedings of the AAAI conference on artificial intelligence, vol\u00a033, pp 9259\u20139266","DOI":"10.1609\/aaai.v33i01.33019259"},{"key":"229_CR39","doi-asserted-by":"crossref","unstructured":"Law H, Deng J (2018) Cornernet: detecting objects as paired keypoints. In: Proceedings of the European conference on computer vision (ECCV), pp 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"229_CR40","doi-asserted-by":"crossref","unstructured":"Dai J, He K, Sun J (2016) Instance-aware semantic segmentation via multi-task network cascades. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3150\u20133158","DOI":"10.1109\/CVPR.2016.343"},{"issue":"2","key":"229_CR41","doi-asserted-by":"publisher","first-page":"119","DOI":"10.1007\/s11633-017-1053-3","volume":"14","author":"B Zhao","year":"2017","unstructured":"Zhao B, Feng J, Wu X, Yan S (2017) A survey on deep learning-based fine-grained object classification and semantic segmentation. Int J Autom Comput 14(2):119\u2013135","journal-title":"Int J Autom Comput"},{"key":"229_CR42","doi-asserted-by":"crossref","unstructured":"Vinyals O, Toshev A, Bengio S, Erhan D (2015) Show and tell: a neural image caption generator. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3156\u20133164","DOI":"10.1109\/CVPR.2015.7298935"},{"key":"229_CR43","doi-asserted-by":"crossref","unstructured":"Ghiasi G, Lin T-Y, Le QV (2019) Nas-fpn: learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 7036\u20137045","DOI":"10.1109\/CVPR.2019.00720"},{"key":"229_CR44","doi-asserted-by":"crossref","unstructured":"Xu H, Yao L, Zhang W, Liang X, Li Z (2019) Auto-FPN: automatic network architecture adaptation for object detection beyond classification. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6649\u20136658","DOI":"10.1109\/ICCV.2019.00675"},{"key":"229_CR45","doi-asserted-by":"crossref","unstructured":"Tan M, Pang R, Le QV (2020) Efficientdet: scalable and efficient object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 10781\u201310790","DOI":"10.1109\/CVPR42600.2020.01079"},{"key":"229_CR46","doi-asserted-by":"publisher","DOI":"10.1088\/1742-6596\/1518\/1\/012042","volume":"1518","author":"W He","year":"2020","unstructured":"He W, Wu Y, Liang P, Hao G (2020) Using darts to improve mold id recognition model based on mask R-CNN. J Phys Conf Ser 1518:012042","journal-title":"J Phys Conf Ser"},{"key":"229_CR47","doi-asserted-by":"crossref","unstructured":"Huang Z, Huang L, Gong Y, Huang C, Wang X (2019) Mask scoring R-CNN. in: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 6409\u20136418","DOI":"10.1109\/CVPR.2019.00657"},{"key":"229_CR48","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) Yolo9000: better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 7263\u20137271","DOI":"10.1109\/CVPR.2017.690"},{"key":"229_CR49","unstructured":"Farhadi A, Redmon J Yolov3: An incremental improvement. Comput Vis Pattern Recognit cite as"},{"key":"229_CR50","doi-asserted-by":"crossref","unstructured":"Zhang Z, Qiao S, Xie C, Shen W, Wang B, Yuille AL (2018) Single-shot object detection with enriched semantics. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5813\u20135821","DOI":"10.1109\/CVPR.2018.00609"},{"key":"229_CR51","doi-asserted-by":"crossref","unstructured":"Liu Y, Li H, Yan J, Wei F, Wang X, Tang X (2017) Recurrent scale approximation for object detection in CNN. In: Proceedings of the IEEE international conference on computer vision, pp 571\u2013579","DOI":"10.1109\/ICCV.2017.69"},{"key":"229_CR52","doi-asserted-by":"crossref","unstructured":"Singh B, Davis LS (2018) An analysis of scale invariance in object detection snip. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 3578\u20133587","DOI":"10.1109\/CVPR.2018.00377"},{"key":"229_CR53","doi-asserted-by":"crossref","unstructured":"Wang G, Xiong Z, Liu D, Luo C (2018) Cascade mask generation framework for fast small object detection. In: 2018 IEEE international conference on multimedia and expo (ICME). IEEE, pp 1\u20136","DOI":"10.1109\/ICME.2018.8486561"},{"key":"229_CR54","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Goyal P, Girshick R, He K, Doll\u00e1r P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision, pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.324"},{"key":"229_CR55","unstructured":"Fu C-Y, Liu W, Ranga A, Tyagi A, Berg AC Dssd: deconvolutional single shot detector. arXiv:1701.06659"},{"key":"229_CR56","doi-asserted-by":"crossref","unstructured":"Li S, Yang L, Huang J, Hua X-S, Zhang L (2019) Dynamic anchor feature selection for single-shot object detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 6609\u20136618","DOI":"10.1109\/ICCV.2019.00671"},{"key":"229_CR57","doi-asserted-by":"crossref","unstructured":"Zhu Y, Zhao C, Wang J, Zhao X, Wu Y, Lu H (2017) Couplenet: coupling global structure with local parts for object detection. In: Proceedings of the IEEE international conference on computer vision, pp 4126\u20134134","DOI":"10.1109\/ICCV.2017.444"},{"key":"229_CR58","doi-asserted-by":"crossref","unstructured":"Duan K, Bai S, Xie L, Qi H, Huang Q, Tian Q (2019) Centernet: object detection with keypoint triplets. arXiv:1904.08189","DOI":"10.1109\/ICCV.2019.00667"},{"key":"229_CR59","doi-asserted-by":"crossref","unstructured":"Sun P, Zhang R, Jiang Y, Kong T, Xu C, Zhan W, Tomizuka M, Li L, Yuan Z, Wang C et al (2021) Sparse R-CNN: End-to-end object detection with learnable proposals. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 14454\u201314463","DOI":"10.1109\/CVPR46437.2021.01422"},{"key":"229_CR60","doi-asserted-by":"crossref","unstructured":"Li J, Cheng B, Feris R, Xiong J, Huang TS, Hwu W-M, Shi H (2021) Pseudo-IOU: improving label assignment in anchor-free object detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp 2378\u20132387","DOI":"10.1109\/CVPRW53098.2021.00270"},{"key":"229_CR61","doi-asserted-by":"publisher","first-page":"2708","DOI":"10.1109\/TIP.2020.3048630","volume":"30","author":"Y Li","year":"2021","unstructured":"Li Y, Pang Y, Cao J, Shen J, Shao L (2021) Improving single shot object detection with feature scale unmixing. IEEE Trans Image Process 30:2708\u20132721","journal-title":"IEEE Trans Image Process"},{"key":"229_CR62","doi-asserted-by":"crossref","unstructured":"Carion N, Massa F, Synnaeve G, Usunier N, Kirillov A, Zagoruyko S (2020) End-to-end object detection with transformers. In: European conference on computer vision. Springer, pp 213\u2013229","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"229_CR63","doi-asserted-by":"publisher","first-page":"18967","DOI":"10.1109\/ACCESS.2018.2814605","volume":"6","author":"B Li","year":"2018","unstructured":"Li B, He Y (2018) An improved resnet based on the adjustable shortcut connections. IEEE Access 6:18967\u201318974","journal-title":"IEEE Access"},{"key":"229_CR64","doi-asserted-by":"publisher","first-page":"103811","DOI":"10.1016\/j.imavis.2019.09.002","volume":"93","author":"A Mahmood","year":"2020","unstructured":"Mahmood A, Bennamoun M, An S, Sohel F, Boussaid F (2020) Resfeats: residual network based features for underwater image classification. Image Vis Comput 93:103811","journal-title":"Image Vis Comput"},{"key":"229_CR65","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T, Dehghani M, Minderer M, Heigold G, Gelly S et\u00a0al, An image is worth 16x16 words: Transformers for image recognition at scale. arXiv:2010.11929"},{"issue":"2","key":"229_CR66","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CK, Winn J, Zisserman A (2010) The pascal visual object classes (VOC) challenge. Int J Comput Vision 88(2):303\u2013338","journal-title":"Int J Comput Vision"},{"key":"229_CR67","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"229_CR68","unstructured":"Chen X, Li H, Wu Q, Meng F, Qiu H Bal-R2CNN: high quality recurrent object detection with balance optimization. IEEE Trans Multimed"},{"key":"229_CR69","doi-asserted-by":"crossref","unstructured":"Aziz L, FC MSBHS, Ayub S (2021) Multi-level refinement enriched feature pyramid network for object detection. Image Visi Comput 115:104287","DOI":"10.1016\/j.imavis.2021.104287"}],"container-title":["International Journal of Multimedia Information Retrieval"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-022-00229-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s13735-022-00229-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s13735-022-00229-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,5,6]],"date-time":"2022-05-06T17:06:56Z","timestamp":1651856816000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s13735-022-00229-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3,24]]},"references-count":69,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2022,6]]}},"alternative-id":["229"],"URL":"https:\/\/doi.org\/10.1007\/s13735-022-00229-6","relation":{},"ISSN":["2192-6611","2192-662X"],"issn-type":[{"value":"2192-6611","type":"print"},{"value":"2192-662X","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,3,24]]},"assertion":[{"value":"27 September 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 March 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"13 March 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 March 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}