{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,21]],"date-time":"2026-07-21T14:57:31Z","timestamp":1784645851627,"version":"3.55.0"},"reference-count":297,"publisher":"Springer Science and Business Media LLC","issue":"33-34","license":[{"start":{"date-parts":[[2020,6,12]],"date-time":"2020-06-12T00:00:00Z","timestamp":1591920000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2020,6,12]],"date-time":"2020-06-12T00:00:00Z","timestamp":1591920000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"published-print":{"date-parts":[[2020,9]]},"DOI":"10.1007\/s11042-020-08976-6","type":"journal-article","created":{"date-parts":[[2020,6,12]],"date-time":"2020-06-12T16:02:47Z","timestamp":1591977767000},"page":"23729-23791","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":508,"title":["A review of object detection based on deep learning"],"prefix":"10.1007","volume":"79","author":[{"given":"Youzi","family":"Xiao","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3669-3748","authenticated-orcid":false,"given":"Zhiqiang","family":"Tian","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Jiachen","family":"Yu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yinshu","family":"Zhang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shuai","family":"Liu","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shaoyi","family":"Du","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xuguang","family":"Lan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2020,6,12]]},"reference":[{"issue":"11","key":"8976_CR1","doi-asserted-by":"crossref","first-page":"2189","DOI":"10.1109\/TPAMI.2012.28","volume":"34","author":"B Alexe","year":"2012","unstructured":"Alexe B, Deselaers T, Ferrari V (2012) Measuring the objectness of image windows. IEEE Transactions on Pattern Analysis and Machine Intelligence 34 (11):2189\u20132202","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR2","unstructured":"Andreas G, Philip L, Raquel U (2012) Are we ready for autonomous driving? The KITTI vision benchmark suite. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3354\u20133361"},{"key":"8976_CR3","unstructured":"Anwar S, Sung W (2016) Coarse pruning of convolutional neural networks with random masks"},{"key":"8976_CR4","doi-asserted-by":"crossref","unstructured":"Arbelaez P, Pont-Tuset J, Barron JT, Marques F, Malik J (2014) Multiscale combinatorial grouping. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 328\u2013335","DOI":"10.1109\/CVPR.2014.49"},{"key":"8976_CR5","doi-asserted-by":"crossref","unstructured":"Bae SH (2019) Object detection based on region decomposition and assembly. In: Proceedings of the AAAI conference on artificial intelligence (AAAI)","DOI":"10.1609\/aaai.v33i01.33018094"},{"issue":"8","key":"8976_CR6","first-page":"1823","volume":"9","author":"PL Bartlett","year":"2008","unstructured":"Bartlett PL, Wegkamp MH (2008) Classification with a reject option using a hinge loss. J Mach Learn Res 9(8):1823\u20131840","journal-title":"J Mach Learn Res"},{"key":"8976_CR7","doi-asserted-by":"crossref","unstructured":"Bell S, Lawrence Zitnick C, Bala K, Girshick R (2016) Inside-outside net: detecting objects in context with skip pooling and recurrent neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2874\u20132883","DOI":"10.1109\/CVPR.2016.314"},{"issue":"8","key":"8976_CR8","doi-asserted-by":"crossref","first-page":"1798","DOI":"10.1109\/TPAMI.2013.50","volume":"35","author":"Y Bengio","year":"2013","unstructured":"Bengio Y, Courville A, Vincent P (2013) Representation learning: a review and new perspectives. IEEE Transactions on Pattern Analysis and Machine Intelligence 35(8):1798\u20131828","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR9","doi-asserted-by":"crossref","unstructured":"Bertinetto L, Valmadre J, Henriques JF, Vedaldi A, Torr PH (2016) Fully-convolutional siamese networks for object tracking. In: European conference on computer vision (ECCV), pp 850\u2013865","DOI":"10.1007\/978-3-319-48881-3_56"},{"key":"8976_CR10","doi-asserted-by":"crossref","unstructured":"Bodla N, Singh B, Chellappa R, Davis LS (2017) Soft-NMS\u2013Improving object detection with one line of code. In: IEEE international conference on computer vision (ICCV), pp 5562\u20135570","DOI":"10.1109\/ICCV.2017.593"},{"key":"8976_CR11","unstructured":"Borji A, Cheng MM, Hou Q, Jiang H, Li J (2014) Salient object detection: a survey. Computational Visual Media, pp 1\u201334"},{"key":"8976_CR12","doi-asserted-by":"crossref","unstructured":"Bromley J, Guyon I, LeCun Y, S?ckinger E, Shah R (1994) Signature verification using a siamese time delay neural network. In: Advances in neural information processing systems (NIPS), pp 737\u2013744","DOI":"10.1142\/9789812797926_0003"},{"key":"8976_CR13","unstructured":"Caffe2 (2020) A new lightweight, modular, and scalable deep learning framework. https:\/\/caffe2.ai\/. Software available from caffe2.ai"},{"key":"8976_CR14","doi-asserted-by":"crossref","unstructured":"Cai Z, Fan Q, Feris RS, Vasconcelos N (2016) A unified multi-scale deep convolutional neural network for fast object detection. In: European conference on computer vision (ECCV), pp 354\u2013370","DOI":"10.1007\/978-3-319-46493-0_22"},{"key":"8976_CR15","doi-asserted-by":"crossref","unstructured":"Cai L, Zhao B, Wang Z, Lin J, Foo CS, Aly MS, Chandrasekhar V (2019) MaxpoolNMS: getting rid of NMS bottlenecks in two-stage object detectors. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 9356\u20139364","DOI":"10.1109\/CVPR.2019.00958"},{"key":"8976_CR16","doi-asserted-by":"crossref","unstructured":"Cai Z, Vasconcelos N (2018) Cascade r-cnn: delving into high quality object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6154\u20136162","DOI":"10.1109\/CVPR.2018.00644"},{"key":"8976_CR17","doi-asserted-by":"crossref","unstructured":"Cao G, Xie X, Yang W, Liao Q, Shi G, Wu J (2018) Feature-fused SSD: fast detection for small objects. In: Ninth international conference on graphic and image processing (ICGIP), p 106151","DOI":"10.1117\/12.2304811"},{"key":"8976_CR18","doi-asserted-by":"crossref","unstructured":"Caron M, Bojanowski P, Joulin A, Douze M (2018) Deep clustering for unsupervised learning of visual features. In: European conference on computer vision (ECCV), pp 139\u2013156","DOI":"10.1007\/978-3-030-01264-9_9"},{"key":"8976_CR19","doi-asserted-by":"crossref","unstructured":"Carreira J, Sminchisescu C (2011) CPMC: automatic object segmentation using constrained parametric min-cuts. IEEE Transactions on Pattern Analysis and Machine Intelligence (7): 1312\u20131328","DOI":"10.1109\/TPAMI.2011.231"},{"issue":"1","key":"8976_CR20","doi-asserted-by":"crossref","first-page":"41","DOI":"10.1023\/A:1007379606734","volume":"28","author":"R Caruana","year":"1997","unstructured":"Caruana R (1997) Multitask learning. Mach Learn 28(1):41\u201375","journal-title":"Mach Learn"},{"key":"8976_CR21","doi-asserted-by":"crossref","unstructured":"Castro FM, Marin-Jimenez MJ, Guil N, Schmid C, Alahari K (2018) End-to-end incremental learning. In: Proceedings of the European conference on computer vision (ECCV), pp 233\u2013248","DOI":"10.1007\/978-3-030-01258-8_15"},{"issue":"3","key":"8976_CR22","doi-asserted-by":"crossref","first-page":"1247","DOI":"10.5194\/gmd-7-1247-2014","volume":"7","author":"T Chai","year":"2014","unstructured":"Chai T, Draxler RR (2014) Root mean square error (RMSE) or mean absolute error (MAE)? Arguments against avoiding RMSE in the literature. Geosci Model Dev 7(3):1247\u20131250","journal-title":"Geosci Model Dev"},{"key":"8976_CR23","doi-asserted-by":"crossref","unstructured":"Chen X, Xiang S, Liu C-L, Pan C-H (2013) Vehicle detection in satellite images by parallel deep convolutional neural networks. In: Asian conference on pattern recognition (ACPR), pp 181\u2013185","DOI":"10.1109\/ACPR.2013.33"},{"key":"8976_CR24","doi-asserted-by":"crossref","unstructured":"Chen C, Seff A, Kornhauser A, Xiao J (2015) Deepdriving: learning affordance for direct perception in autonomous driving. In: Proceedings of the IEEE international conference on computer vision (CVPR), pp 2722\u20132730","DOI":"10.1109\/ICCV.2015.312"},{"key":"8976_CR25","unstructured":"Chen G, Choi W, Yu X, Han T, Chandraker M (2017) Learning efficient object detection models with knowledge distillation. In: Advances in neural information processing systems (NIPS), pp 742\u2013751"},{"key":"8976_CR26","doi-asserted-by":"crossref","unstructured":"Chen H, Wang Y, Wang G, Qiao Y (2018) LSTD: a low-shot transfer detector for object detection","DOI":"10.1609\/aaai.v32i1.11716"},{"key":"8976_CR27","unstructured":"Chen K, et al. (2020) Open MMLab Detection Toolbox (mmdetection). https:\/\/github.com\/open-mmlab\/mmdetection"},{"issue":"1","key":"8976_CR28","doi-asserted-by":"crossref","first-page":"13","DOI":"10.1109\/TPAMI.2014.2343217","volume":"37","author":"Q Chen","year":"2015","unstructured":"Chen Q, Song Z, Dong J, Huang Z, Hua Y, Yan S (2015) Contextualizing object detection and classification. IEEE Transactions on Pattern Analysis and Machine Intelligence 37(1):13\u201327","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR29","doi-asserted-by":"crossref","unstructured":"Chen X, Gupta A (2017) Spatial memory for context reasoning in object detection. In: IEEE international conference on computer vision (ICCV), pp 4106\u20134116","DOI":"10.1109\/ICCV.2017.440"},{"key":"8976_CR30","unstructured":"Chen X, Kundu K, Zhu Y, Berneshawi AG, Ma H, Fidler S, Urtasun R (2015) 3d object proposals for accurate object class detection. In: Advances in neural information processing systems (NIPS), pp 424\u2013432"},{"key":"8976_CR31","doi-asserted-by":"crossref","unstructured":"Chen X, Ma H, Wan J, Li B, Xia T (2017) Multi-view 3d object detection network for autonomous driving. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6526\u20136534","DOI":"10.1109\/CVPR.2017.691"},{"key":"8976_CR32","doi-asserted-by":"crossref","unstructured":"Cheng B, Wei Y, Shi H, Feris R, Xiong J, Huang T (2018) Revisiting rcnn: on awakening the classification power of faster rcnn. In: Proceedings of the European conference on computer vision (ECCV), pp 453\u2013468","DOI":"10.1007\/978-3-030-01267-0_28"},{"issue":"12","key":"8976_CR33","doi-asserted-by":"crossref","first-page":"7405","DOI":"10.1109\/TGRS.2016.2601622","volume":"54","author":"G Cheng","year":"2016","unstructured":"Cheng G, Zhou P, Han J (2016) Learning rotation-invariant convolutional neural networks for object detection in VHR optical remote sensing images. IEEE Trans Geosci Remote Sens 54(12):7405\u20137415","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"8976_CR34","doi-asserted-by":"crossref","unstructured":"Chollet F (2017) Xception: deep learning with depthwise separable convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1800\u20131807","DOI":"10.1109\/CVPR.2017.195"},{"key":"8976_CR35","doi-asserted-by":"crossref","unstructured":"Cordts M, Omran M, Ramos S, Rehfeld T, Enzweiler M, Benenson R, Franke U, Roth S, Schiele B (2016) The cityscapes dataset for semantic urban scene understanding. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3213\u20133223","DOI":"10.1109\/CVPR.2016.350"},{"issue":"3","key":"8976_CR36","first-page":"273","volume":"20","author":"C Cortes","year":"1995","unstructured":"Cortes C, Vapnik V (1995) Support vector machine. Mach Learn 20(3):273\u2013297","journal-title":"Mach Learn"},{"key":"8976_CR37","doi-asserted-by":"crossref","unstructured":"Dai J, He K, Sun J (2016) Instance-aware semantic segmentation via multi-task network cascades. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3150\u20133158","DOI":"10.1109\/CVPR.2016.343"},{"key":"8976_CR38","unstructured":"Dai J, Li Y, He K, Sun J (2016) R-FCN: object detection via region-based fully convolutional networks. In: Advances in neural information processing systems (NIPS), pp 379\u2013387"},{"key":"8976_CR39","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR"},{"key":"8976_CR40","doi-asserted-by":"crossref","unstructured":"Dalal N, Triggs B (2005) Histograms of oriented gradients for human detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 886\u2013893","DOI":"10.1109\/CVPR.2005.177"},{"issue":"1","key":"8976_CR41","doi-asserted-by":"crossref","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","volume":"134","author":"P-T De Boer","year":"2005","unstructured":"De Boer P-T, Kroese DP, Mannor S, Rubinstein RY (2005) A tutorial on the cross-entropy method. Ann Oper Res 134(1):19\u201367","journal-title":"Ann Oper Res"},{"key":"8976_CR42","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li L-J, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"8976_CR43","unstructured":"Denton E, Zaremba W, Bruna J, LeCun Y, Fergus R (2014) Exploiting linear structure within convolutional networks for efficient evaluation. In: Advances in neural information processing systems (NIPS), pp 1269\u20131277"},{"key":"8976_CR44","doi-asserted-by":"crossref","unstructured":"Divvala SK, Hoiem D, Hays JH, Efros AA, Hebert M (2009) An empirical study of context in object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1271\u20131278","DOI":"10.1109\/CVPRW.2009.5206532"},{"key":"8976_CR45","doi-asserted-by":"crossref","unstructured":"Dollar P, Wojek C, Schiele B, Perona P (2009) Pedestrian detection: a benchmark. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 304\u2013311","DOI":"10.1109\/CVPRW.2009.5206631"},{"issue":"4","key":"8976_CR46","doi-asserted-by":"crossref","first-page":"743","DOI":"10.1109\/TPAMI.2011.155","volume":"34","author":"P Dollar","year":"2012","unstructured":"Dollar P, Wojek C, Schiele B, Perona P (2012) Pedestrian detection: an evaluation of the state of the art. IEEE Transactions on Pattern Analysis and Machine Intelligence 34(4):743\u2013761","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR47","doi-asserted-by":"crossref","unstructured":"Duan K, Bai S, Xie L, Qi H, Huang Q, Tian Q (2019) Centernet: keypoint triplets for object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 6569\u20136578","DOI":"10.1109\/ICCV.2019.00667"},{"key":"8976_CR48","unstructured":"Dundar A, Jin J, Culurciello E (2016) Convolutional clustering for unsupervised learning. In: International conference on learning representations (ICLR"},{"key":"8976_CR49","doi-asserted-by":"crossref","unstructured":"Endres I, Hoiem D (2010) Category independent object proposals. In: European conference on computer vision (ECCV), pp 575\u2013588","DOI":"10.1007\/978-3-642-15555-0_42"},{"key":"8976_CR50","doi-asserted-by":"crossref","unstructured":"Erhan D, Szegedy C, Toshev A, Anguelov D (2014) Scalable object detection using deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2147\u20132154","DOI":"10.1109\/CVPR.2014.276"},{"issue":"1","key":"8976_CR51","doi-asserted-by":"crossref","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham M, Eslami SA, Van Gool L, Williams CK, Winn J, Zisserman A (2015) The pascal visual object classes challenge: a retrospective. Int J Comput Vis 111(1):98\u2013136","journal-title":"Int J Comput Vis"},{"issue":"2","key":"8976_CR52","doi-asserted-by":"crossref","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham M, Van Gool L, Williams CK, Winn J, Zisserman A (2010) The pascal visual object classes (voc) challenge. Int J Comput Vis 88(2):303\u2013338","journal-title":"Int J Comput Vis"},{"key":"8976_CR53","unstructured":"Facebook Al Research (2020) FAIR\u2019s research platform for object detection research (Detectron). https:\/\/github.com\/facebookresearch\/Detectron"},{"key":"8976_CR54","doi-asserted-by":"crossref","unstructured":"Fan Q, Brown L, Smith J (2016) A closer look at faster R-CNN for vehicle detection. In: IEEE intelligent vehicles symposium (IV), pp 124\u2013129","DOI":"10.1109\/IVS.2016.7535375"},{"issue":"4","key":"8976_CR55","doi-asserted-by":"crossref","first-page":"594","DOI":"10.1109\/TPAMI.2006.79","volume":"28","author":"L Fei-Fei","year":"2006","unstructured":"Fei-Fei L, Fergus R, Perona P (2006) One-shot learning of object categories. IEEE Transactions on Pattern Analysis and Machine Intelligence 28(4):594\u2013611","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"1","key":"8976_CR56","doi-asserted-by":"crossref","first-page":"59","DOI":"10.1016\/j.cviu.2005.09.012","volume":"106","author":"L Fei-Fei","year":"2007","unstructured":"Fei-Fei L, Fergus R, Perona P (2007) Learning generative visual models from few training examples: an incremental bayesian approach tested on 101 object categories. Computer Vision Image Understanding 106(1):59\u201370","journal-title":"Computer Vision Image Understanding"},{"key":"8976_CR57","doi-asserted-by":"crossref","unstructured":"Felzenszwalb P, McAllester D, Ramanan D (2008) A discriminatively trained, multiscale, deformable part model. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1\u20138","DOI":"10.1109\/CVPR.2008.4587597"},{"issue":"9","key":"8976_CR58","doi-asserted-by":"crossref","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb PF, Girshick RB, McAllester D, Ramanan D (2010) Object detection with discriminatively trained part-based models. IEEE Transactions on Pattern Analysis and Machine Intelligence 32(9):1627\u20131645","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR59","unstructured":"Fu C-Y, Liu W, Ranga A, Tyagi A, Berg AC (2017) DSSD: deconvolutional single shot detector. arXiv:170106659"},{"key":"8976_CR60","doi-asserted-by":"crossref","unstructured":"Gao M, Yu R, Li A, Morariu VI, Davis LS (2018) Dynamic zoom-in network for fast object detection in large images. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6926\u20136935","DOI":"10.1109\/CVPR.2018.00724"},{"key":"8976_CR61","unstructured":"Gentile C, Warmuth MK (1999) Linear hinge loss and average margin. In: Advances in neural information processing systems (NIPS), pp 225\u2013231"},{"key":"8976_CR62","doi-asserted-by":"crossref","unstructured":"Ghiasi G, Lin TY, Le QV (2019) Nas-fpn: learning scalable feature pyramid architecture for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 7036\u20137045","DOI":"10.1109\/CVPR.2019.00720"},{"key":"8976_CR63","doi-asserted-by":"crossref","unstructured":"Ghodrati A, Diba A, Pedersoli M, Tuytelaars T, Van Gool L (2015) Deepproposal: hunting objects by cascading deep convolutional layers. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2578\u20132586","DOI":"10.1109\/ICCV.2015.296"},{"key":"8976_CR64","doi-asserted-by":"crossref","unstructured":"Gholami A, Kwon K, Wu B, Tai Z, Yue X, Jin P, Zhao S, Keutzer K (2018) SqueezeNext: hardware-aware neural network design. In: IEEE conference on computer vision and pattern recognition workshops (CVPRW), pp 1638\u20131647","DOI":"10.1109\/CVPRW.2018.00215"},{"key":"8976_CR65","doi-asserted-by":"crossref","unstructured":"Girshick R (2015) Fast R-CNN. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1440\u20131448","DOI":"10.1109\/ICCV.2015.169"},{"key":"8976_CR66","doi-asserted-by":"crossref","unstructured":"Girshick R, Donahue J, Darrell T, Malik J (2014) Rich feature hierarchies for accurate object detection and semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 580\u2013587","DOI":"10.1109\/CVPR.2014.81"},{"key":"8976_CR67","unstructured":"Goodfellow IJ, Pouget-Abadie J, Mirza M, Xu B, Warde-Farley D, Ozair S, Courville A, Bengio Y (2014) Generative adversarial nets. In: Advances in neural information processing systems (NIPS), pp 2672\u20132680"},{"key":"8976_CR68","unstructured":"Gordon RS, Perez M (2018) Safety for wearable virtual reality devices via object detection and tracking. US Patent Application"},{"key":"8976_CR69","doi-asserted-by":"crossref","unstructured":"Graves A, Mohamed A-R, Hinton G (2013) Speech recognition with deep recurrent neural networks. In: IEEE international conference on acoustics, speech and signal processing (ICASSP), pp 6645\u20136649","DOI":"10.1109\/ICASSP.2013.6638947"},{"key":"8976_CR70","unstructured":"Griffin G, Holub A, Perona P (2007) Caltech-256 object category dataset. In: Technical Report of California Institute"},{"key":"8976_CR71","doi-asserted-by":"crossref","unstructured":"Gu C, Sun C, Ross D, Vondrick C, Pantofaru C, Li Y, Vijayanarasimhan S, Toderici G, Ricco S, Sukthankar R (2018) AVA: a video dataset of spatio-temporally localized atomic visual actions. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6047\u20136056","DOI":"10.1109\/CVPR.2018.00633"},{"key":"8976_CR72","doi-asserted-by":"crossref","unstructured":"Gupta S, Girshick R, Arbelaez P, Malik J (2014) Learning rich features from RGB-D images for object detection and segmentation. In: European conference on computer vision (ECCV), pp 345\u2013360","DOI":"10.1007\/978-3-319-10584-0_23"},{"key":"8976_CR73","unstructured":"Han S, Mao H, Dally WJ (2016) Deep compression: compressing deep neural networks with pruning trained quantization and huffman coding. In: International conference on learning representations (ICLR)"},{"key":"8976_CR74","doi-asserted-by":"crossref","unstructured":"Hao Y, Fu Y, Jiang YG, Tian Q (2019, July) An end-to-end architecture for class-incremental object detection with knowledge distillation. In: IEEE international conference on multimedia and expo (ICME), pp 1\u20136","DOI":"10.1109\/ICME.2019.00009"},{"key":"8976_CR75","doi-asserted-by":"crossref","unstructured":"Hao Z, Liu Y, Qin H, Yan J, Li X, Hu X (2017) Scale-aware face detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6186\u20136195","DOI":"10.1109\/CVPR.2017.207"},{"key":"8976_CR76","doi-asserted-by":"crossref","unstructured":"Hariharan B, Arbelaez P, Girshick R, Malik J (2017) Object instance segmentation and fine-grained localization using hypercolumns. IEEE Transactions on Pattern Analysis and Machine Intelligence (4): 627\u2013639","DOI":"10.1109\/TPAMI.2016.2578328"},{"key":"8976_CR77","doi-asserted-by":"crossref","unstructured":"Harzallah H, Jurie F, Schmid C (2009) Combining efficient object localization and image classification. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 237\u2013244","DOI":"10.1109\/ICCV.2009.5459257"},{"key":"8976_CR78","doi-asserted-by":"crossref","unstructured":"Hastie T, Tibshirani R, Friedman J (2009) Unsupervised learning. In: The elements of statistical learning. Springer, Berlin, pp 485\u2013585","DOI":"10.1007\/978-0-387-84858-7_14"},{"key":"8976_CR79","doi-asserted-by":"crossref","unstructured":"He K, Girshick R, Dollar P (2018) Rethinking ImageNet Pre-training. arXiv:181108883","DOI":"10.1109\/ICCV.2019.00502"},{"key":"8976_CR80","doi-asserted-by":"crossref","unstructured":"He K, Gkioxari G, Dollar P, Girshick R (2017) Mask R-CNN. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2980\u20132988","DOI":"10.1109\/ICCV.2017.322"},{"key":"8976_CR81","doi-asserted-by":"crossref","unstructured":"He KM, Zhang XY, Ren SQ, Sun J (2014) Spatial pyramid pooling in deep convolutional networks for visual recognition. In: European conference on computer vision (ECCV), pp 346\u2013361","DOI":"10.1007\/978-3-319-10578-9_23"},{"key":"8976_CR82","doi-asserted-by":"crossref","unstructured":"He KM, Zhang XY, Ren SQ, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"issue":"3","key":"8976_CR83","doi-asserted-by":"crossref","first-page":"330","DOI":"10.1007\/s11263-015-0822-0","volume":"115","author":"S He","year":"2015","unstructured":"He S, Lau RW, Liu W, Huang Z, Yang Q (2015) Supercnn: a superpixelwise convolutional neural network for salient object detection. Int J Comput Vis 115(3):330\u2013344","journal-title":"Int J Comput Vis"},{"key":"8976_CR84","doi-asserted-by":"crossref","unstructured":"He Y, Zhang X, Savvides M, Kitani K (2018) Softer-NMS: rethinking bounding box regression for accurate object detection. arXiv:180908545","DOI":"10.1109\/CVPR.2019.00300"},{"key":"8976_CR85","unstructured":"Hetang C, Qin H, Liu S, Yan J (2017) Impression network for video object detection. arXiv:171205896"},{"issue":"8","key":"8976_CR86","doi-asserted-by":"crossref","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter S, Schmidhuber J (1997) Long short-term memory. Neural Comput 9(8):1735\u20131780","journal-title":"Neural Comput"},{"issue":"5","key":"8976_CR87","first-page":"2403","volume":"46","author":"SC Hoi","year":"2015","unstructured":"Hoi SC, Wu X, Liu H, Wu Y, Wang H, Xue H, Wu Q (2015) Logo-net: large-scale deep logo detection and brand recognition with deep region-based convolutional networks. IEEE Transactions on Pattern Analysis and Machine Intelligence 46(5):2403\u20132412","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR88","unstructured":"Howard AG, Zhu M, Chen B, Kalenichenko D, Wang W, Weyand T, Andreetto M, Adam H (2017) Mobilenets: efficient convolutional neural networks for mobile vision applications. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"8976_CR89","doi-asserted-by":"crossref","unstructured":"Hu J, Shen L, Sun G (2018) Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 7132\u20137141","DOI":"10.1109\/CVPR.2018.00745"},{"key":"8976_CR90","doi-asserted-by":"crossref","unstructured":"Hu P, Ramanan D (2017) Finding tiny faces. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 951\u2013959","DOI":"10.1109\/CVPR.2017.166"},{"key":"8976_CR91","doi-asserted-by":"crossref","unstructured":"Hu P, Shuai B, Liu J, Wang G (2017) Deep level sets for salient object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2300\u20132309","DOI":"10.1109\/CVPR.2017.65"},{"key":"8976_CR92","doi-asserted-by":"crossref","unstructured":"Huang G, Liu Z, van der Maaten L, Weinberger KQ (2017) Densely connected convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2261\u20132269","DOI":"10.1109\/CVPR.2017.243"},{"key":"8976_CR93","doi-asserted-by":"crossref","unstructured":"Huang J, Rathod V, Sun C, Zhu ML, Korattikara A, Fathi A, Fischer I, Wojna Z, Song Y, Guadarrama S, Murphy K (2017) Speed\/accuracy trade-offs for modern convolutional object detectors. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3296\u20133297","DOI":"10.1109\/CVPR.2017.351"},{"key":"8976_CR94","doi-asserted-by":"crossref","unstructured":"Huang W, Qiao Y, Tang X (2014) Robust scene text detection with convolution neural network induced mser trees. In: European conference on computer vision (ECCV), pp 497\u2013511","DOI":"10.1007\/978-3-319-10593-2_33"},{"key":"8976_CR95","doi-asserted-by":"crossref","unstructured":"Hwang S, Kim HE (2016) Self-transfer learning for weakly supervised lesion localization. In: International conference on medical image computing and computer-assisted intervention, pp 239\u2013246","DOI":"10.1007\/978-3-319-46723-8_28"},{"key":"8976_CR96","unstructured":"Iandola FN, Han S, Moskewicz MW, Ashraf K, Dally WJ, Keutzer K (2017) Squeezenet: alexnet-level accuracy with 50x fewer parameters and < 0.5 mb model size. In: International conference on learning representations (ICLR)"},{"key":"8976_CR97","unstructured":"Ioffe S, Szegedy C (2015) Batch normalization: accelerating deep network training by reducing internal covariate shift. In: International conference on machine learning (ICML), pp 448\u2013456"},{"key":"8976_CR98","doi-asserted-by":"crossref","unstructured":"Itti L, Koch C, Niebur E (1998) A model of saliency-based visual attention for rapid scene analysis. IEEE Transactions on Pattern Analysis and Machine Intelligence (11): 1254\u20131259","DOI":"10.1109\/34.730558"},{"key":"8976_CR99","doi-asserted-by":"crossref","unstructured":"Janocha K, Czarnecki WM (2017) On loss functions for deep neural networks in classification. arXiv:170205659","DOI":"10.4467\/20838476SI.16.004.6185"},{"key":"8976_CR100","doi-asserted-by":"crossref","unstructured":"Jiang B, Luo R, Mao J, Xiao T, Jiang Y (2018) Acquisition of localization confidence for accurate object detection. In: European conference on computer vision (ECCV), pp 8\u201314","DOI":"10.1007\/978-3-030-01264-9_48"},{"key":"8976_CR101","doi-asserted-by":"crossref","unstructured":"Jiang H, Learned-Miller E (2017) Face detection with the faster R-CNN. In: IEEE international conference on automatic face and gesture recognition, pp 650\u2013657","DOI":"10.1109\/FG.2017.82"},{"issue":"10","key":"8976_CR102","doi-asserted-by":"crossref","first-page":"2896","DOI":"10.1109\/TCSVT.2017.2736553","volume":"28","author":"K Kang","year":"2018","unstructured":"Kang K, Li H, Yan J, Zeng X, Yang B, Xiao T, Zhang C, Wang Z, Wang R, Wang X (2018) T-cnn: tubelets with convolutional neural networks for object detection from videos. IEEE Transactions on Circuits Systems for Video Technology 28(10):2896\u20132907","journal-title":"IEEE Transactions on Circuits Systems for Video Technology"},{"key":"8976_CR103","doi-asserted-by":"crossref","unstructured":"Kang K, Ouyang W, Li H, Wang X (2016) Object detection from video tubelets with convolutional neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 817\u2013825","DOI":"10.1109\/CVPR.2016.95"},{"key":"8976_CR104","doi-asserted-by":"crossref","unstructured":"Karpathy A, Toderici G, Shetty S, Leung T, Sukthankar R, Fei-Fei L (2014) Large-scale video classification with convolutional neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1725\u20131732","DOI":"10.1109\/CVPR.2014.223"},{"key":"8976_CR105","unstructured":"Kavukcuoglu K, Sermanet P, Boureau Y-L, Gregor K, Mathieu M, Cun YL (2010) Learning convolutional feature hierarchies for visual recognition. In: Advances in neural information processing systems (NIPS), pp 1090\u20131098"},{"key":"8976_CR106","doi-asserted-by":"crossref","unstructured":"Kawahara J, Hamarneh G (2016) Multi-resolution-tract CNN with hybrid pretrained and skin-lesion trained layers. In: International workshop on machine learning in medical imaging, pp 164\u2013171","DOI":"10.1007\/978-3-319-47157-0_20"},{"key":"8976_CR107","doi-asserted-by":"crossref","unstructured":"Kim S-W, Kook H-K, Sun J-Y, Kang M-C, Ko S-J (2018) Parallel feature pyramid network for object detection. In: European conference on computer vision (ECCV), pp 234\u2013250","DOI":"10.1007\/978-3-030-01228-1_15"},{"key":"8976_CR108","doi-asserted-by":"crossref","unstructured":"Kleban J, Xie X, Ma W-Y (2008) Spatial pyramid mining for logo detection in natural scenes. In: IEEE international conference on multimedia and expo (ICME), pp 1077\u20131080","DOI":"10.1109\/ICME.2008.4607625"},{"key":"8976_CR109","unstructured":"Koch G, Zemel R, Salakhutdinov R (2015) Siamese neural networks for one-shot image recognition. In: International conference on machine learning (ICML"},{"key":"8976_CR110","doi-asserted-by":"crossref","unstructured":"Kong B, Zhan Y, Shin M, Denny T, Zhang S (2016) Recognizing end-diastole and end-systole frames via deep temporal regression network. In: International conference on medical image computing and computer-assisted intervention, pp 264\u2013272","DOI":"10.1007\/978-3-319-46726-9_31"},{"key":"8976_CR111","doi-asserted-by":"crossref","unstructured":"Kong T, Sun F, Yao A, Liu H, Lu M, Chen Y (2017) Ron: reverse connection with objectness prior networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5244\u20135252","DOI":"10.1109\/CVPR.2017.557"},{"key":"8976_CR112","doi-asserted-by":"crossref","unstructured":"Kong T, Yao A, Chen Y, Sun F (2016) Hypernet: towards accurate region proposal generation and joint object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 845\u2013853","DOI":"10.1109\/CVPR.2016.98"},{"key":"8976_CR113","unstructured":"Krasin I, Duerig T, Alldrin N, Veit A, Abu-El-Haija S, Belongie S, Cai D, Feng Z, Ferrari V, Gomes V (2016) Openimages: a public dataset for large-scale multi-label and multi-class image classification. Dataset available from https:\/\/githubcom\/openimages 2(6):7"},{"key":"8976_CR114","doi-asserted-by":"crossref","unstructured":"Krhenbuhl P, Koltun V (2014) Geodesic object proposals. In: European conference on computer vision (ECCV), pp 725\u2013739","DOI":"10.1007\/978-3-319-10602-1_47"},{"key":"8976_CR115","unstructured":"Krizhevsky A, Hinton G (2009) Learning multiple layers of features from tiny images. In: Technical Report of University of Toronto"},{"issue":"6","key":"8976_CR116","doi-asserted-by":"crossref","first-page":"84","DOI":"10.1145\/3065386","volume":"60","author":"A Krizhevsky","year":"2017","unstructured":"Krizhevsky A, Sutskever I, Hinton GE (2017) Imagenet classification with deep convolutional neural networks. Commun ACM 60(6):84\u201390","journal-title":"Commun ACM"},{"key":"8976_CR117","doi-asserted-by":"crossref","unstructured":"Kuo W, Hariharan B, Malik J (2015) Deepbox: learning objectness with convolutional networks. In: Proceedings of the IEEE international conference on computer vision (CVPR), pp 2479\u20132487","DOI":"10.1109\/ICCV.2015.285"},{"key":"8976_CR118","doi-asserted-by":"crossref","unstructured":"Lai K, Bo L, Ren X, Fox D (2011) A large-scale hierarchical multi-view rgb-d object dataset. In: IEEE international conference on robotics and automation (ICRA), pp 1817\u20131824","DOI":"10.1109\/ICRA.2011.5980382"},{"key":"8976_CR119","unstructured":"LaLonde R, Bagci U (2018) Capsules for object segmentation. arXiv:1804.04241"},{"key":"8976_CR120","doi-asserted-by":"crossref","unstructured":"Law H, Deng J (2018) Cornernet: detecting objects as paired keypoints. In: Proceedings of the European conference on computer vision (ECCV), pp 734\u2013750","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"8976_CR121","doi-asserted-by":"crossref","unstructured":"Lazebnik S, Schmid C, Ponce J (2006) Beyond bags of features: spatial pyramid matching for recognizing natural scene categories. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2169\u20132178","DOI":"10.1109\/CVPR.2006.68"},{"issue":"7553","key":"8976_CR122","doi-asserted-by":"crossref","first-page":"436","DOI":"10.1038\/nature14539","volume":"521","author":"Y LeCun","year":"2015","unstructured":"LeCun Y, Bengio Y, Hinton G (2015) Deep learning. Nature 521 (7553):436\u2013444","journal-title":"Nature"},{"key":"8976_CR123","doi-asserted-by":"crossref","unstructured":"Lecun Y, Bottou L, Bengio Y, Haffner P (1998) Gradient-based learning applied to document recognition. In: Proceedings of the IEEE, pp 2278\u20132324","DOI":"10.1109\/5.726791"},{"key":"8976_CR124","unstructured":"Li G, Yu Y (2015) Visual saliency based on multiscale deep features. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5455\u20135463"},{"key":"8976_CR125","unstructured":"Li H, Kadav A, Durdanovic I, Samet H, Graf HP (2016) Pruning filters for efficient convnets. arXiv:1608.08710"},{"key":"8976_CR126","doi-asserted-by":"crossref","unstructured":"Li H, Lin Z, Shen X, Brandt J, Hua G (2015) A convolutional neural network cascade for face detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5325\u20135334","DOI":"10.1109\/CVPR.2015.7299170"},{"issue":"4","key":"8976_CR127","first-page":"985","volume":"20","author":"J Li","year":"2017","unstructured":"Li J, Liang X, Shen S, Xu T, Feng J, Yan S (2017) Scale-aware fast R-CNN for pedestrian detection. IEEE Transactions on Multimedia 20(4):985\u2013996","journal-title":"IEEE Transactions on Multimedia"},{"key":"8976_CR128","doi-asserted-by":"crossref","unstructured":"Li J, Liang X, Wei Y, Xu T, Feng J, Yan S (2017) Perceptual generative adversarial networks for small object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1222\u20131230","DOI":"10.1109\/CVPR.2017.211"},{"issue":"5","key":"8976_CR129","doi-asserted-by":"crossref","first-page":"944","DOI":"10.1109\/TMM.2016.2642789","volume":"19","author":"J Li","year":"2017","unstructured":"Li J, Wei Y, Liang X, Dong J, Xu T, Feng J, Yan S (2017) Attentive contexts for object detection. IEEE Transactions on Multimedia 19(5):944\u2013954","journal-title":"IEEE Transactions on Multimedia"},{"issue":"4","key":"8976_CR130","doi-asserted-by":"crossref","first-page":"2337","DOI":"10.1109\/TGRS.2017.2778300","volume":"56","author":"K Li","year":"2017","unstructured":"Li K, Cheng G, Bu S, You X (2017) Rotation-insensitive and context-augmented object detection in remote sensing images. IEEE Trans Geosci Remote Sens 56(4):2337\u20132348","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"8976_CR131","doi-asserted-by":"crossref","unstructured":"Li L, Xu M, Wang X, Jiang L, Liu H (2019) Attention based glaucoma detection: a large-scale database and CNN model. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 10571\u201310580","DOI":"10.1109\/CVPR.2019.01082"},{"key":"8976_CR132","doi-asserted-by":"crossref","unstructured":"Li S, Yang L, Huang J, Hua XS, Zhang L (2019) Dynamic anchor feature selection for single-shot object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 6609\u20136618","DOI":"10.1109\/ICCV.2019.00671"},{"key":"8976_CR133","doi-asserted-by":"crossref","unstructured":"Li Y, Chen Y, Wang N, Zhang Z (2019) Scale-aware trident networks for object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV)","DOI":"10.1109\/ICCV.2019.00615"},{"key":"8976_CR134","doi-asserted-by":"crossref","unstructured":"Li Y, Wang D, Hu H, Lin Y, Zhuang Y (2017) Zero-shot recognition using dual visual-semantic mapping paths. In: Proceedings of the IEEE international conference on computer vision (CVPR), pp 5207\u20135215","DOI":"10.1109\/CVPR.2017.553"},{"key":"8976_CR135","doi-asserted-by":"crossref","unstructured":"Li Z, Peng C, Yu G, Zhang X, Deng Y, Sun J (2018) DetNet: design backbone for object detection. In: European conference on computer vision (ECCV), pp 334\u2013350","DOI":"10.1007\/978-3-030-01240-3_21"},{"key":"8976_CR136","unstructured":"Li Z, Peng C, Yu G, Zhang X, Deng Y, Sun J (2018) Light-head R-CNN: in defense of two-stage object detector. In: Proceedings of the IEEE international conference on computer vision (CVPR)"},{"key":"8976_CR137","unstructured":"Li Z, Zhou F (2017) FSSD: feature fusion single shot multibox detector. arXiv:171200960"},{"key":"8976_CR138","doi-asserted-by":"crossref","unstructured":"Liang M, Hu X (2015) Recurrent convolutional neural network for object recognition. In: Proceedings of the IEEE international conference on computer vision (CVPR), pp 3367\u20133375","DOI":"10.1109\/CVPR.2015.7298958"},{"issue":"5","key":"8976_CR139","doi-asserted-by":"crossref","first-page":"947","DOI":"10.21629\/JSEE.2018.05.07","volume":"29","author":"L Liangkui","year":"2018","unstructured":"Liangkui L, Shaoyou W, Zhongxing T (2018) Using deep learning to detect small targets in infrared oversampling images. J Syst Eng Electron 29(5):947\u2013952","journal-title":"J Syst Eng Electron"},{"key":"8976_CR140","doi-asserted-by":"crossref","unstructured":"Lienhart R, Maydt J (2002) An extended set of haar-like features for rapid object detection. In: Proceedings of the international conference on image processing (ICIP), pp 1\u20131","DOI":"10.1109\/ICIP.2002.1038171"},{"key":"8976_CR141","unstructured":"Lin M, Chen Q, Yan S (2014) Network in network. In: International conference on learning representations (ICLR)"},{"key":"8976_CR142","doi-asserted-by":"crossref","unstructured":"Lin T, Goyal P, Girshick R, He K, Dollr P (2017) Focal loss for dense object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2999\u20133007","DOI":"10.1109\/ICCV.2017.324"},{"key":"8976_CR143","doi-asserted-by":"crossref","unstructured":"Lin T-Y, Maire M, Belongie S, Hays J, Perona P, Ramanan D, Dollar P, Zitnick CL (2014) Microsoft coco: common objects in context. In: European conference on computer vision (ECCV), pp 740\u2013755","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"8976_CR144","doi-asserted-by":"crossref","unstructured":"Lin TY, Dollar P, Girshick R, He KM, Hariharan B, Belongie S (2017) Feature pyramid networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 936\u2013944","DOI":"10.1109\/CVPR.2017.106"},{"key":"8976_CR145","doi-asserted-by":"crossref","first-page":"60","DOI":"10.1016\/j.media.2017.07.005","volume":"42","author":"G Litjens","year":"2017","unstructured":"Litjens G, Kooi T, Bejnordi BE, Setio AAA, Ciompi F, Ghafoorian M, Van Der Laak JA, Van Ginneken B, Sanchez CI (2017) A survey on deep learning in medical image analysis. Med Image Anal 42:60\u201388","journal-title":"Med Image Anal"},{"key":"8976_CR146","doi-asserted-by":"crossref","unstructured":"Liu C, Zoph B, Neumann M, Shlens J, Hua W, Li L-J, Fei-Fei L, Yuille A, Huang J, Murphy K (2018) Progressive neural architecture search. In: European conference on computer vision (ECCV), pp 19\u201334","DOI":"10.1007\/978-3-030-01246-5_2"},{"key":"8976_CR147","doi-asserted-by":"crossref","unstructured":"Liu L, Ouyang W, Wang X, Fieguth P, Chen J, Liu X, Pietik?inen M (2018) Deep learning for generic object detection: a survey. International Journal of Computer Vision","DOI":"10.1007\/s11263-019-01247-4"},{"key":"8976_CR148","doi-asserted-by":"crossref","unstructured":"Liu S, Huang D, Wang Y (2018) Receptive field block net for accurate and fast object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)","DOI":"10.1007\/978-3-030-01252-6_24"},{"issue":"2","key":"8976_CR149","doi-asserted-by":"crossref","first-page":"353","DOI":"10.1109\/TPAMI.2010.70","volume":"33","author":"T Liu","year":"2010","unstructured":"Liu T, Yuan Z, Sun J, Wang J, Zheng N, Tang X, Shum HY (2010) Learning to detect a salient object. IEEE Transactions on Pattern analysis and Machine Intelligence 33(2):353\u2013367","journal-title":"IEEE Transactions on Pattern analysis and Machine Intelligence"},{"key":"8976_CR150","doi-asserted-by":"crossref","unstructured":"Liu W, Anguelov D, Erhan D, Szegedy C, Reed S, Fu C-Y, Berg AC (2016) SSD: single shot multibox detector. In: European conference on computer vision (ECCV), pp 21\u201337","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"8976_CR151","doi-asserted-by":"crossref","unstructured":"Liu Y, Wang R, Shan S, Chen X (2018) Structure inference net: object detection using scene-level context and instance-level relationships. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6985\u20136994","DOI":"10.1109\/CVPR.2018.00730"},{"key":"8976_CR152","doi-asserted-by":"crossref","unstructured":"Long J, Shelhamer E, Darrell T (2015) Fully convolutional networks for semantic segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3431\u20133440","DOI":"10.1109\/CVPR.2015.7298965"},{"issue":"5","key":"8976_CR153","doi-asserted-by":"crossref","first-page":"2486","DOI":"10.1109\/TGRS.2016.2645610","volume":"55","author":"Y Long","year":"2017","unstructured":"Long Y, Gong Y, Xiao Z, Liu Q (2017) Accurate object localization in remote sensing images based on convolutional neural networks. IEEE Trans Geosci Remote Sens 55(5):2486\u20132498","journal-title":"IEEE Trans Geosci Remote Sens"},{"key":"8976_CR154","unstructured":"Lotter W, Kreiman G, Cox D (2017) Deep predictive coding networks for video prediction and unsupervised learning. International conference on learning representations (ICLR)"},{"issue":"2","key":"8976_CR155","doi-asserted-by":"crossref","first-page":"91","DOI":"10.1023\/B:VISI.0000029664.99615.94","volume":"60","author":"DG Lowe","year":"2004","unstructured":"Lowe DG (2004) Distinctive image features from scale-invariant keypoints. Int J Comput Vis 60(2):91\u2013110","journal-title":"Int J Comput Vis"},{"key":"8976_CR156","unstructured":"Luo JH, Wu J (2017) An entropy-based pruning method for cnn compression. arXiv:1706.05791"},{"key":"8976_CR157","doi-asserted-by":"crossref","unstructured":"Ma N, Zhang X, Zheng H-T, Sun J (2018) ShuffleNet v2: practical guidelines for efficient cnn architecture design. In: European conference on computer vision (ECCV), pp 116\u2013131","DOI":"10.1007\/978-3-030-01264-9_8"},{"issue":"3","key":"8976_CR158","doi-asserted-by":"crossref","first-page":"417","DOI":"10.1109\/TETC.2016.2593643","volume":"6","author":"H Mao","year":"2018","unstructured":"Mao H, Yao S, Tang T, Li B, Yao J, Wang Y (2018) Towards real-time object detection on embedded systems. IEEE Transactions on Emerging Topics in Computing 6(3):417\u2013431","journal-title":"IEEE Transactions on Emerging Topics in Computing"},{"key":"8976_CR159","doi-asserted-by":"crossref","unstructured":"Mao J, Xiao T, Jiang Y, Cao Z (2017) What can help pedestrian detection?. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3127\u20133136","DOI":"10.1109\/CVPR.2017.639"},{"key":"8976_CR160","unstructured":"Mate K, Zbigniew W, Jakub M, Jacek N, Kyunghyun C (2019) Augmentation for small object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR"},{"issue":"4","key":"8976_CR161","doi-asserted-by":"crossref","first-page":"235","DOI":"10.1093\/ijl\/3.4.235","volume":"3","author":"GA Miller","year":"1990","unstructured":"Miller GA, Beckwith R, Fellbaum C, Gross D, Miller KJ (1990) Introduction to WordNet: an on-line lexical database. Int J Lexicogr 3(4):235\u2013244","journal-title":"Int J Lexicogr"},{"key":"8976_CR162","unstructured":"Moore R, DeNero J (2013) L1 and L2 regularization for multiclass hinge loss models. Symposium on Machine Learning in Speech and Language Processing"},{"key":"8976_CR163","unstructured":"Nair V, Hinton GE (2010) Rectified linear units improve restricted boltzmann machines. In: Proceedings of the international conference on machine learning (ICML), pp 807\u2013814"},{"key":"8976_CR164","unstructured":"Najafi H, Genc Y (2010) Fast object detection for augmented reality systems. US Patent Application"},{"key":"8976_CR165","doi-asserted-by":"crossref","unstructured":"Najibi M, Samangouei P, Chellappa R, Davis LS (2017) Ssh: single stage headless face detector. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 4875\u20134884","DOI":"10.1109\/ICCV.2017.522"},{"key":"8976_CR166","doi-asserted-by":"crossref","unstructured":"Neubeck A, Van Gool L (2006) Efficient non-maximum suppression. In: International conference on pattern recognition (ICPR), pp 850\u2013855","DOI":"10.1109\/ICPR.2006.479"},{"key":"8976_CR167","unstructured":"Ni K, Pearce R, Boakye K, Van Essen B, Borth D, Chen B, Wang E (2015) Large-scale deep learning on the YFCC100M dataset. arXiv:150203409"},{"key":"8976_CR168","doi-asserted-by":"crossref","unstructured":"Oquab M, Bottou L, Laptev I, Sivic J (2014) Learning and transferring mid-level image representations using convolutional neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1717\u20131724","DOI":"10.1109\/CVPR.2014.222"},{"key":"8976_CR169","unstructured":"Ouyang W, Wang K, Zhu X, Wang X (2017) Learning chained deep features and classifiers for cascade in object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV)"},{"key":"8976_CR170","doi-asserted-by":"crossref","unstructured":"Ouyang W, Wang X (2013) Joint deep learning for pedestrian detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2056\u20132063","DOI":"10.1109\/ICCV.2013.257"},{"key":"8976_CR171","doi-asserted-by":"crossref","unstructured":"Ouyang W, Wang X, Zeng X, Qiu S, Luo P, Tian Y, Li H, Yang S, Wang Z, Loy C-C (2015) Deepid-net: deformable deep convolutional neural networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2403\u20132412","DOI":"10.1109\/CVPR.2015.7298854"},{"key":"8976_CR172","doi-asserted-by":"crossref","unstructured":"Pang J, Chen K, Shi J, Feng H, Ouyang W, Lin D (2019) Libra r-cnn: towards balanced learning for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 821\u2013830","DOI":"10.1109\/CVPR.2019.00091"},{"key":"8976_CR173","doi-asserted-by":"crossref","unstructured":"Pang J, Li C, Shi J, Xu Z, Feng H (2019) R2-CNN: fast tiny object detection in large-scale remote sensing images. IEEE Transactions on Geoscience and Remote Sensing","DOI":"10.1109\/TGRS.2019.2899955"},{"key":"8976_CR174","doi-asserted-by":"crossref","unstructured":"Peng C, Xiao T, Li Z, Jiang Y, Zhang X, Jia K, Yu G, Sun J (2018) MegDet: a large mini-batch object detector. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6181\u20136189","DOI":"10.1109\/CVPR.2018.00647"},{"key":"8976_CR175","doi-asserted-by":"crossref","unstructured":"Peng J, Sun M, Zhang Z, Tan T, Yan J (2019) POD: practical object detection with scale-sensitive network. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 9607\u20139616","DOI":"10.1109\/ICCV.2019.00970"},{"key":"8976_CR176","unstructured":"Peng J, Sun M, ZHANG Z X, Tan T, Yan J (2019) Efficient neural architecture transformation search in channel-level for object detection. In: Advances in neural information processing systems (NeurIPS), pp 14290\u201314299"},{"key":"8976_CR177","doi-asserted-by":"crossref","unstructured":"Pentina A, Sharmanska V, Lampert CH (2015) Curriculum learning of multiple tasks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5492\u20135500","DOI":"10.1109\/CVPR.2015.7299188"},{"key":"8976_CR178","unstructured":"Pinheiro PO, Collobert R, Dollar P (2015) Learning to segment object candidates. In: Advances in neural information processing systems (NIPS), pp 1990\u20131998"},{"key":"8976_CR179","unstructured":"PyTorch (2020) Tensors and dynamic neural networks in python with strong GPU acceleration https:\/\/pytorch.org\/. Software available from pytorch.org"},{"key":"8976_CR180","doi-asserted-by":"crossref","unstructured":"Qiang Z, Mei-Chen Y, Kwang-Ting C, Shai A (2006) Fast human detection using a cascade of histograms of oriented gradients. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1491\u20131498","DOI":"10.1109\/CVPR.2006.119"},{"key":"8976_CR181","doi-asserted-by":"crossref","unstructured":"Qiu J, Wang J, Yao S, Guo K, Li B, Zhou E, Yu J, Tang T, Xu N, Song S (2016) Going deeper with embedded fpga platform for convolutional neural network. In: Proceedings of the ACM\/SIGDA international symposium on field-programmable gate arrays, pp 26\u201335","DOI":"10.1145\/2847263.2847265"},{"key":"8976_CR182","doi-asserted-by":"crossref","unstructured":"Rabinovich A, Vedaldi A, Galleguillos C, Wiewiora E, Belongie S (2007) Objects in context. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1\u20138","DOI":"10.1109\/ICCV.2007.4408986"},{"key":"8976_CR183","unstructured":"Radford A, Metz L, Chintala S (2016) Unsupervised representation learning with deep convolutional generative adversarial networks. In: International conference on learning representations (ICLR)"},{"key":"8976_CR184","doi-asserted-by":"crossref","unstructured":"Rahman S, Khan S, Porikli F (2018) Zero-shot object detection: learning to simultaneously recognize and localize novel concepts. In: European conference on computer vision (ECCV","DOI":"10.1007\/978-3-030-20887-5_34"},{"key":"8976_CR185","doi-asserted-by":"crossref","unstructured":"Rastegari M, Ordonez V, Redmon J, Farhadi A (2016) Xnor-net: imagenet classification using binary convolutional neural networks. In: European conference on computer vision (ECCV), pp 525\u2013542","DOI":"10.1007\/978-3-319-46493-0_32"},{"key":"8976_CR186","unstructured":"Redmon J (2013) Darknet: open source neural networks in c. http:\/\/pjreddie.com\/darknet"},{"key":"8976_CR187","doi-asserted-by":"crossref","unstructured":"Redmon J, Divvala S, Girshick R, Farhadi A (2016) You only look once: unified, real-time object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 779\u2013788","DOI":"10.1109\/CVPR.2016.91"},{"key":"8976_CR188","doi-asserted-by":"crossref","unstructured":"Redmon J, Farhadi A (2017) YOLO9000: better, faster, stronger. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6517\u20136525","DOI":"10.1109\/CVPR.2017.690"},{"key":"8976_CR189","unstructured":"Redmon J, Farhadi A (2018) YOLOv3: an incremental improvement. arXiv:180402767"},{"issue":"7","key":"8976_CR190","doi-asserted-by":"crossref","first-page":"1476","DOI":"10.1109\/TPAMI.2016.2601099","volume":"39","author":"S Ren","year":"2016","unstructured":"Ren S, He K, Girshick R, Zhang X, Sun J (2016) Object detection networks on convolutional feature maps. IEEE transactions on pattern analysis and machine intelligence 39(7):1476\u20131481","journal-title":"IEEE transactions on pattern analysis and machine intelligence"},{"key":"8976_CR191","unstructured":"Ren SQ, He KM, Girshick R, Sun J (2015) Faster R-CNN: towards real-time object detection with region proposal networks. In: Advances in neural information processing systems (NIPS), pp 91\u201399"},{"key":"8976_CR192","doi-asserted-by":"crossref","unstructured":"Ren X, Ramanan D (2013) Histograms of sparse codes for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3246\u20133253","DOI":"10.1109\/CVPR.2013.417"},{"issue":"5","key":"8976_CR193","doi-asserted-by":"crossref","first-page":"813","DOI":"10.3390\/app8050813","volume":"8","author":"Y Ren","year":"2018","unstructured":"Ren Y, Zhu C, Xiao S (2018) Small object detection in optical remote sensing images via modified faster R-CNN. Appl Sci 8(5):813","journal-title":"Appl Sci"},{"issue":"6088","key":"8976_CR194","doi-asserted-by":"crossref","first-page":"533","DOI":"10.1038\/323533a0","volume":"323","author":"DE Rumelhart","year":"1986","unstructured":"Rumelhart DE, Hinton GE, Williams RJ (1986) Learning representations by back-propagating errors. Nature 323(6088):533\u2013536","journal-title":"Nature"},{"issue":"3","key":"8976_CR195","doi-asserted-by":"crossref","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky O, Deng J, Su H, Krause J, Satheesh S, Ma S, Huang Z, Karpathy A, Khosla A, Bernstein M (2015) Imagenet large scale visual recognition challenge. Int J Comput Vis 115(3):211\u2013252","journal-title":"Int J Comput Vis"},{"key":"8976_CR196","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen L-C (2018) Mobilenetv2: inverted residuals and linear bottlenecks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"8976_CR197","unstructured":"Sermanet P, Eigen D, Zhang X, Mathieu M, Fergus R, LeCun Y (2014) Overfeat: integrated recognition localization and detection using convolutional networks. In: The international conference on learning representations (ICLR)"},{"key":"8976_CR198","doi-asserted-by":"crossref","unstructured":"Shelhamer E, Rakelly K, Hoffman J, Darrell T (2016) Clockwork convnets for video semantic segmentation. In: European conference on computer vision (ECCV), pp 852\u2013868","DOI":"10.1007\/978-3-319-49409-8_69"},{"key":"8976_CR199","doi-asserted-by":"crossref","unstructured":"Shen Z, Liu Z, Li J, Jiang Y-G, Chen Y, Xue X (2017) DSOD: learning deeply supervised object detectors from scratch. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1937\u20131945","DOI":"10.1109\/ICCV.2017.212"},{"key":"8976_CR200","doi-asserted-by":"crossref","unstructured":"Shigeto Y, Suzuki I, Hara K, Shimbo M, Matsumoto Y (2015) Ridge regression, hubness, and zero-shot learning. In: Joint European conference on machine learning and knowledge discovery in databases (ECML PKDD), pp 135\u2013151","DOI":"10.1007\/978-3-319-23528-8_9"},{"key":"8976_CR201","doi-asserted-by":"crossref","unstructured":"Shmelkov K, Schmid C, Alahari K (2017) Incremental learning of object detectors without catastrophic forgetting. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 3420\u20133429","DOI":"10.1109\/ICCV.2017.368"},{"key":"8976_CR202","doi-asserted-by":"crossref","unstructured":"Shrivastava A, Gupta A, Girshick R (2016) Training region-based object detectors with online hard example mining. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 761\u2013769","DOI":"10.1109\/CVPR.2016.89"},{"key":"8976_CR203","unstructured":"Shrivastava A, Sukthankar R, Malik J, Gupta A (2017) Beyond skip connections: top-down modulation for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)"},{"key":"8976_CR204","doi-asserted-by":"crossref","unstructured":"Singh B, Davis LS (2018) An analysis of scale invariance in object detection SNIP. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3578\u20133587","DOI":"10.1109\/CVPR.2018.00377"},{"key":"8976_CR205","doi-asserted-by":"crossref","unstructured":"Simon M, Milz S, Amende K, Gross H-M (2018) Complex-YOLO: an euler-region-proposal for real-time 3d object detection on point clouds. In: European Conference on Computer Vision Workshops","DOI":"10.1007\/978-3-030-11009-3_11"},{"key":"8976_CR206","unstructured":"Simon M, Rodner E, Denzler J (2016) Imagenet pre-trained models with batch normalization. arXiv:161201452"},{"key":"8976_CR207","unstructured":"Simonyan K, Zisserman A (2015) Very deep convolutional networks for large-scale image recognition. In: International conference on learning representations (ICLR)"},{"key":"8976_CR208","doi-asserted-by":"crossref","unstructured":"Sivic J, Zisserman A (2003) Video Google: a text retrieval approach to object matching in videos. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1470\u20131477","DOI":"10.1109\/ICCV.2003.1238663"},{"issue":"1","key":"8976_CR209","first-page":"1929","volume":"15","author":"N Srivastava","year":"2014","unstructured":"Srivastava N, Hinton G, Krizhevsky A, Sutskever I, Salakhutdinov R (2014) Dropout: a simple way to prevent neural networks from overfitting. The Journal of Machine Learning Research 15(1):1929\u20131958","journal-title":"The Journal of Machine Learning Research"},{"key":"8976_CR210","doi-asserted-by":"crossref","unstructured":"Sturm J, Engelhard N, Endres F, Burgard W, Cremers D (2012) A benchmark for the evaluation of RGB-D SLAM systems. In: IEEE\/RSJ international conference on intelligent robots and systems (IROS), pp 573\u2013580","DOI":"10.1109\/IROS.2012.6385773"},{"key":"8976_CR211","doi-asserted-by":"crossref","unstructured":"Sun B, Saenko K (2014) From virtual to reality: fast adaptation of virtual object detectors to real domains. In: British machine vision conference (BMVC","DOI":"10.5244\/C.28.82"},{"key":"8976_CR212","doi-asserted-by":"crossref","unstructured":"Szegedy C, Ioffe S, Vanhoucke V, Alemi A (2017) Inception-v4 inception-resnet and the impact of residual connections on learning. In: AAAI conference on artificial intelligence","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"8976_CR213","doi-asserted-by":"crossref","unstructured":"Szegedy C, Liu W, Jia YQ, Sermanet P, Reed S, Anguelov D, Erhan D, Vanhoucke V, Rabinovich A (2015) Going deeper with convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1\u20139","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"8976_CR214","doi-asserted-by":"crossref","unstructured":"Szegedy C, Vanhoucke V, Ioffe S, Shlens J, Wojna Z (2016) Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2818\u20132826","DOI":"10.1109\/CVPR.2016.308"},{"key":"8976_CR215","doi-asserted-by":"crossref","unstructured":"Taigman Y, Yang M, Ranzato MA, Wolf L (2014) Deepface: closing the gap to human-level performance in face verification. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1701\u20131708","DOI":"10.1109\/CVPR.2014.220"},{"key":"8976_CR216","doi-asserted-by":"crossref","unstructured":"Tan M, Chen B, Pang R, Vasudevan V, Le QV (2018) MnasNet: platform-aware neural architecture search for mobile. arXiv:180711626","DOI":"10.1109\/CVPR.2019.00293"},{"key":"8976_CR217","unstructured":"Tanner G (2020) Object detection API System. https:\/\/github.com\/tensorflow\/models"},{"key":"8976_CR218","doi-asserted-by":"crossref","unstructured":"Teichmann M, Weber M, Zoellner M, Cipolla R, Urtasun R (2018) Multinet: real-time joint semantic reasoning for autonomous driving. In: IEEE intelligent vehicles symposium (IV), pp 1013\u20131020","DOI":"10.1109\/IVS.2018.8500504"},{"key":"8976_CR219","unstructured":"TensorFlow (2020) Large-Scale machine learning on heterogeneous distributed systems. https:\/\/www.tensorflow.org\/. Software available from tensorflow.org"},{"key":"8976_CR220","doi-asserted-by":"crossref","unstructured":"Tian Y, Luo P, Wang X, Tang X (2015) Deep learning strong parts for pedestrian detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1904\u20131912","DOI":"10.1109\/ICCV.2015.221"},{"key":"8976_CR221","doi-asserted-by":"crossref","unstructured":"Tian Z, Shen C, Chen H, He T (2019) FCOS: fully convolutional one-stage object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV","DOI":"10.1109\/ICCV.2019.00972"},{"issue":"11","key":"8976_CR222","doi-asserted-by":"crossref","first-page":"1958","DOI":"10.1109\/TPAMI.2008.128","volume":"30","author":"A Torralba","year":"2008","unstructured":"Torralba A, Fergus R, Freeman WT (2008) 80 million tiny images: a large data set for nonparametric object and scene recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence 30(11):1958\u20131970","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR223","doi-asserted-by":"crossref","unstructured":"Tychsen-Smith L, Petersson L (2017) Denet: scalable real-time object detection with directed sparse sampling. Objects in context. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 428\u2013436","DOI":"10.1109\/ICCV.2017.54"},{"issue":"2","key":"8976_CR224","doi-asserted-by":"crossref","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JRR Uijlings","year":"2013","unstructured":"Uijlings JRR, van de Sande KEA, Gevers T, Smeulders AWM (2013) Selective search for object recognition. International Journal of Computer Vision (IJCV) 104 (2):154\u2013171","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"8976_CR225","doi-asserted-by":"crossref","unstructured":"van de Sande KEA, Uijlings JRR, Gevers T, Smeulders AWM (2011) Segmentation as selective search for object recognition. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 1879\u20131886","DOI":"10.1109\/ICCV.2011.6126456"},{"key":"8976_CR226","unstructured":"Van Etten A (2018) You only look twice: rapid multi-scale object detection in satellite imagery. arXiv:1805.09512"},{"key":"8976_CR227","doi-asserted-by":"crossref","unstructured":"Van Horn G, Mac Aodha O, Song Y, Cui Y, Sun C, Shepard A, Adam H, Perona P, Belongie S (2018) The inaturalist species classification and detection dataset. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 8769\u20138778","DOI":"10.1109\/CVPR.2018.00914"},{"key":"8976_CR228","doi-asserted-by":"crossref","unstructured":"Vedaldi A, Gulshan V, Varma M, Zisserman A (2009) Multiple kernels for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 606\u2013613","DOI":"10.1109\/ICCV.2009.5459183"},{"key":"8976_CR229","doi-asserted-by":"crossref","unstructured":"Viola P, Jones M (2001) Rapid object detection using a boosted cascade of simple features. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1\u20131","DOI":"10.1109\/CVPR.2001.990517"},{"issue":"2","key":"8976_CR230","doi-asserted-by":"crossref","first-page":"137","DOI":"10.1023\/B:VISI.0000013087.49260.fb","volume":"57","author":"P Viola","year":"2004","unstructured":"Viola P, Jones MJ (2004) Robust real-time face detection. International journal of computer vision 57(2):137\u2013154","journal-title":"International journal of computer vision"},{"key":"8976_CR231","unstructured":"Wagstaff K, Cardie C, Rogers S, Schr?dl S (2001) Constrained k-means clustering with background knowledge. In: International conference on machine learning (ICML), pp 577\u2013584"},{"issue":"2","key":"8976_CR232","doi-asserted-by":"crossref","first-page":"310","DOI":"10.1109\/LGRS.2018.2872355","volume":"16","author":"C Wang","year":"2018","unstructured":"Wang C, Bai X, Wang S, Zhou J, Ren P (2018) Multiscale visual attention networks for object detection in VHR remote sensing images. IEEE Geosci Remote Sens Lett 16(2):310\u2013314","journal-title":"IEEE Geosci Remote Sens Lett"},{"key":"8976_CR233","doi-asserted-by":"crossref","unstructured":"Wang H, Wang Q, Gao M, Li P, Zuo W (2018) Multi-scale location-aware kernel representation for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1248\u20131257","DOI":"10.1109\/CVPR.2018.00136"},{"issue":"6","key":"8976_CR234","doi-asserted-by":"crossref","first-page":"620","DOI":"10.3390\/rs11060620","volume":"11","author":"J Wang","year":"2019","unstructured":"Wang J, Zheng T, Lei P, Bai X (2019) A hierarchical convolution neural network (CNN)-based ship target detection method in spaceborne SAR imagery. Remote Sens 11(6):620","journal-title":"Remote Sens"},{"key":"8976_CR235","doi-asserted-by":"crossref","unstructured":"Wang L, Lu H, Ruan X, Yang MH (2015) Deep networks for saliency detection via local estimation and global search. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3183\u20133192","DOI":"10.1109\/CVPR.2015.7298938"},{"key":"8976_CR236","doi-asserted-by":"crossref","unstructured":"Wang L, Wang L, Lu H, Zhang P, Ruan X (2016) Saliency detection with recurrent fully convolutional networks. In: European conference on computer vision (ECCV), pp 825\u2013841","DOI":"10.1007\/978-3-319-46493-0_50"},{"key":"8976_CR237","unstructured":"Wang RJ, Li X, Ao S, Ling CX (2018) Pelee: a real-time object detection system on mobile devices. In: Advances in neural information processing systems (NIPS)"},{"key":"8976_CR238","doi-asserted-by":"crossref","unstructured":"Wang S, Zhou Y, Yan J, Deng Z (2018) Fully motion-aware network for video object detection. In: European conference on computer vision (ECCV), pp 542\u2013557","DOI":"10.1007\/978-3-030-01261-8_33"},{"key":"8976_CR239","unstructured":"Wang W, Lai Q, Fu H, Shen J, Ling H (2019) Salient object detection in the deep learning era: an in-depth survey. arXiv:1904.09146"},{"key":"8976_CR240","doi-asserted-by":"crossref","unstructured":"Wang WH, Yang J, Xiao JW, Li S, Zhou DX (2015) Face recognition based on deep learning. In: International conference on human-centered computing (HCC), pp 812\u2013820","DOI":"10.1007\/978-3-319-15554-8_73"},{"key":"8976_CR241","doi-asserted-by":"crossref","unstructured":"Wang X, Han T, Yan S (2009) An HOG-LBP human detector with partial occlusion handling. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 32\u201339","DOI":"10.1109\/ICCV.2009.5459207"},{"key":"8976_CR242","doi-asserted-by":"crossref","unstructured":"Wang X, Xiao T, Jiang Y, Shao S, Sun J, Shen C (2018) Repulsion loss: detecting pedestrians in a crowd. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 7774\u20137783","DOI":"10.1109\/CVPR.2018.00811"},{"key":"8976_CR243","doi-asserted-by":"crossref","unstructured":"Wang XL, Shrivastava A, Gupta A (2017) A-Fast-RCNN: hard positive generation via adversary for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 3039\u20133048","DOI":"10.1109\/CVPR.2017.324"},{"key":"8976_CR244","doi-asserted-by":"crossref","first-page":"216","DOI":"10.1016\/j.patcog.2016.04.002","volume":"58","author":"Y Wei","year":"2016","unstructured":"Wei Y, You X, Li H (2016) Multiscale patch-based contrast measure for small infrared target detection. Pattern Recogn 58:216\u2013226","journal-title":"Pattern Recogn"},{"issue":"1","key":"8976_CR245","doi-asserted-by":"crossref","first-page":"417","DOI":"10.1016\/j.cirp.2016.04.072","volume":"65","author":"D Weimer","year":"2016","unstructured":"Weimer D, Scholz-Reiter B, Shpitalni M (2016) Design of deep convolutional neural network architectures for automated feature extraction in industrial inspection. CIRP Annals-Manufacturing Technology 65(1):417\u2013420","journal-title":"CIRP Annals-Manufacturing Technology"},{"key":"8976_CR246","unstructured":"Weston J, Watkins C (1999) Support vector machines for multi-class pattern recognition. In: European symposium on artificial neural networks (ESANN), pp 219\u2013224"},{"issue":"1","key":"8976_CR247","doi-asserted-by":"crossref","first-page":"79","DOI":"10.3354\/cr030079","volume":"30","author":"CJ Willmott","year":"2005","unstructured":"Willmott CJ, Matsuura K (2005) Advantages of the mean absolute error (MAE) over the root mean square error (RMSE) in assessing average model performance. Climate Research 30(1):79\u201382","journal-title":"Climate Research"},{"key":"8976_CR248","doi-asserted-by":"crossref","unstructured":"Wu J, Leng C, Wang Y, Hu Q, Cheng J (2016) Quantized convolutional neural networks for mobile devices. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4820\u20134828","DOI":"10.1109\/CVPR.2016.521"},{"key":"8976_CR249","unstructured":"Wu X, Wu Y, Zhao Y (2016) Binarized neural networks on the imagenet classification task. arXiv:1604.03058"},{"key":"8976_CR250","unstructured":"Xiangyu Z, Xinyu Z, Mengxiao L, Jian S (2017) ShuffleNet: an extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 6848\u20136856"},{"issue":"1","key":"8976_CR251","doi-asserted-by":"crossref","first-page":"3","DOI":"10.1007\/s11263-014-0748-y","volume":"119","author":"J Xiao","year":"2016","unstructured":"Xiao J, Ehinger KA, Hays J, Torralba A, Oliva A (2016) Sun database: exploring a large collection of scene categories. Int J Comput Vis 119(1):3\u201322","journal-title":"Int J Comput Vis"},{"key":"8976_CR252","doi-asserted-by":"crossref","unstructured":"Xie S, Girshick R, Dollar P, Tu Z, He K (2017) Aggregated residual transformations for deep neural networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5987\u20135995","DOI":"10.1109\/CVPR.2017.634"},{"key":"8976_CR253","unstructured":"Xu M, Cui L, Lv P, Jiang X, Niu J, Zhou B, Wang M (2018) Mdssd: Multi-scale deconvolutional single shot detector for small objects. arXiv:1805.07009"},{"key":"8976_CR254","doi-asserted-by":"crossref","unstructured":"Xue J, Li JY, Gong YF (2013) Restructuring of deep neural network acoustic models with singular value decomposition. In: Annual conference of the international speech communication association, pp 2364\u20132368","DOI":"10.21437\/Interspeech.2013-552"},{"key":"8976_CR255","doi-asserted-by":"crossref","unstructured":"Yanai K, Kawano Y (2015) Food image recognition using deep convolutional network with pre-training and fine-tuning. In: IEEE international conference on multimedia and expo workshops (ICMEW), pp 1\u20136","DOI":"10.1109\/ICMEW.2015.7169816"},{"key":"8976_CR256","unstructured":"Yang B, Yan J, Lei Z, Li SZ (2014) Aggregate channel features for multi-view face detection. In: IEEE international joint conference on biometrics (IJCB), pp 1\u20138"},{"issue":"8","key":"8976_CR257","doi-asserted-by":"crossref","first-page":"1845","DOI":"10.1109\/TPAMI.2017.2738644","volume":"40","author":"S Yang","year":"2017","unstructured":"Yang S, Luo P, Loy CC, Tang X (2017) Faceness-net: face detection through deep facial part responses. IEEE Transactions on Pattern Analysis and Machine Intelligence 40(8):1845\u20131859","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR258","doi-asserted-by":"crossref","unstructured":"Yang TJ, Chen YH, Sze V (2017) Designing energy-efficient convolutional neural networks using energy-aware pruning. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5687\u20135695","DOI":"10.1109\/CVPR.2017.643"},{"key":"8976_CR259","doi-asserted-by":"crossref","unstructured":"Yang Z, Liu S, Hu H, Wang L, Lin S (2019) Reppoints: point set representation for object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV","DOI":"10.1109\/ICCV.2019.00975"},{"key":"8976_CR260","doi-asserted-by":"crossref","unstructured":"Yildirim G, Susstrunk S (2014) FASA: fast, accurate, and size-aware salient object detection. In: Asian conference on computer vision (ACCV), pp 514\u2013528","DOI":"10.1007\/978-3-319-16811-1_34"},{"key":"8976_CR261","unstructured":"Yu F, Koltun V (2016) Multi-scale context aggregation by dilated convolutions. In: International conference on learning representations (ICLR)"},{"key":"8976_CR262","doi-asserted-by":"crossref","unstructured":"Yu F, Koltun V, Funkhouser TA (2017) Dilated residual networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 636\u2013644","DOI":"10.1109\/CVPR.2017.75"},{"key":"8976_CR263","unstructured":"Yu Y, Zhang J, Huang Y, Zheng S, Ren W, Wang C, Huang K, Tan T (2010) Object detection by context and boosted HOG-LBP. In: European conference on computer vision workshop on PASCAL VOC"},{"issue":"4","key":"8976_CR264","doi-asserted-by":"crossref","first-page":"766","DOI":"10.1109\/TMM.2007.893351","volume":"9","author":"Z Yu","year":"2007","unstructured":"Yu Z, Wong HS (2007) A rule based technique for extraction of visual attention regions based on real-time clustering. IEEE Transactions on Multimedia 9(4):766\u2013784","journal-title":"IEEE Transactions on Multimedia"},{"key":"8976_CR265","doi-asserted-by":"crossref","unstructured":"Zagoruyko S, Lerer A, Lin TY, Pinheiro PO, Gross S, Chintala S, Dollar P (2016) A multipath network for object detection. arXiv:1604.02135","DOI":"10.5244\/C.30.15"},{"key":"8976_CR266","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Fergus R (2014) Visualizing and understanding convolutional networks. In: European conference on computer vision (ECCV), pp 818\u2013833","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"8976_CR267","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Krishnan D, Taylor GW, Fergus R (2010) Deconvolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2528\u20132535","DOI":"10.1109\/CVPR.2010.5539957"},{"key":"8976_CR268","doi-asserted-by":"crossref","unstructured":"Zeiler MD, Taylor GW, Fergus R (2011) Adaptive deconvolutional networks for mid and high level feature learning. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 2018\u20132025","DOI":"10.1109\/ICCV.2011.6126474"},{"key":"8976_CR269","doi-asserted-by":"crossref","unstructured":"Zeng X, Ouyang W, Yang B, Yan J, Wang X (2016) Gated bi-directional cnn for object detection. In: European conference on computer vision (ECCV), pp 354\u2013369","DOI":"10.1007\/978-3-319-46478-7_22"},{"key":"8976_CR270","unstructured":"Zhang H, Chang H, Ma B, Shan S, Chen X (2019) Cascade RetinaNet: maintaining consistency for single-stage object detection. In: The British machine vision conference (BMVC)"},{"key":"8976_CR271","doi-asserted-by":"crossref","unstructured":"Zhang J, Sclaroff S, Lin Z, Shen X, Price B, Mech R (2016) Unconstrained salient object detection via proposal subset optimization. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5733\u20135742","DOI":"10.1109\/CVPR.2016.618"},{"key":"8976_CR272","doi-asserted-by":"crossref","unstructured":"Zhang J, Zhang T, Dai Y, Harandi M, Hartley R (2018) Deep unsupervised saliency detection: a multiple noisy labeling perspective. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 9029\u20139038","DOI":"10.1109\/CVPR.2018.00941"},{"issue":"10","key":"8976_CR273","doi-asserted-by":"crossref","first-page":"1499","DOI":"10.1109\/LSP.2016.2603342","volume":"23","author":"K Zhang","year":"2016","unstructured":"Zhang K, Zhang Z, Li Z, Qiao Y (2016) Joint face detection and alignment using multitask cascaded convolutional networks. IEEE Signal Processing Letters 23 (10):1499\u20131503","journal-title":"IEEE Signal Processing Letters"},{"key":"8976_CR274","doi-asserted-by":"crossref","unstructured":"Zhang L, Lin L, Liang X, He K (2016) Is faster r-cnn doing well for pedestrian detection?. In: European conference on computer vision (ECCV), pp 443\u2013457","DOI":"10.1007\/978-3-319-46475-6_28"},{"key":"8976_CR275","doi-asserted-by":"crossref","unstructured":"Zhang S, Benenson R, Schiele B (2017) Citypersons: a diverse dataset for pedestrian detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4457\u20134465","DOI":"10.1109\/CVPR.2017.474"},{"key":"8976_CR276","doi-asserted-by":"crossref","unstructured":"Zhang S, Wen L, Bian X, Lei Z, Li SZ (2018) Occlusion-aware R-CNN: detecting pedestrians in a crowd. In: European conference on computer vision (ECCV), pp 637\u2013653","DOI":"10.1007\/978-3-030-01219-9_39"},{"key":"8976_CR277","doi-asserted-by":"crossref","unstructured":"Zhang S, Wen L, Bian X, Lei Z, Li SZ (2018) Single-shot refinement neural network for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4203\u20134212","DOI":"10.1109\/CVPR.2018.00442"},{"key":"8976_CR278","doi-asserted-by":"crossref","unstructured":"Zhang S, Zhu X, Lei Z, Shi H, Wang X, Li SZ (2017) Faceboxes: a CPU real-time face detector with high accuracy. In: IEEE international joint conference on biometrics (IJCB), pp 1\u20139","DOI":"10.1109\/BTAS.2017.8272675"},{"key":"8976_CR279","doi-asserted-by":"crossref","unstructured":"Zhang S, Zhu X, Lei Z, Shi H, Wang X, Li SZ (2017) S3fd: single shot scale-invariant face detector. In: Proceedings of the IEEE international conference on computer vision (CVPR), pp 192\u2013201","DOI":"10.1109\/ICCV.2017.30"},{"key":"8976_CR280","unstructured":"Zhang X, Wan F, Liu C, Ji R, Ye Q (2019) Freeanchor: learning to match anchors for visual object detection. In: Advances in neural information processing systems (NeurIPS), pp 147\u2013155"},{"key":"8976_CR281","doi-asserted-by":"crossref","unstructured":"Zhang Z, Qiao S, Xie C, Shen W, Wang B, Yuille AL (2018) Single-shot object detection with enriched semantics. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 5813\u20135821","DOI":"10.1109\/CVPR.2018.00609"},{"key":"8976_CR282","doi-asserted-by":"crossref","unstructured":"Zhang Z, Zhang C, Shen W, Yao C, Liu W, Bai X (2016) Multi-oriented text detection with fully convolutional networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4159\u20134167","DOI":"10.1109\/CVPR.2016.451"},{"key":"8976_CR283","doi-asserted-by":"crossref","unstructured":"Zhao Q, Sheng T, Wang Y, Tang Z, Chen Y, Cai L, Ling H (2019) M2det: a single-shot object detector based on multi-level feature pyramid network. In: Proceedings of the AAAI conference on artificial intelligence (AAAI), pp 9259\u20139266","DOI":"10.1609\/aaai.v33i01.33019259"},{"key":"8976_CR284","doi-asserted-by":"crossref","unstructured":"Zhao R, Ouyang W, Li H, Wang X (2015) Saliency detection by multi-context deep learning. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 1265\u20131274","DOI":"10.1109\/CVPR.2015.7298731"},{"key":"8976_CR285","unstructured":"Zhao Z-Q, Zheng P, Xu S-T, Wu X (2018) Object detection with deep learning: a review. IEEE Transactions on Neural Networks and Learning Systems, pp 1\u201321"},{"issue":"6","key":"8976_CR286","doi-asserted-by":"crossref","first-page":"1452","DOI":"10.1109\/TPAMI.2017.2723009","volume":"40","author":"B Zhou","year":"2018","unstructured":"Zhou B, Lapedriza A, Khosla A, Oliva A, Torralba A (2018) Places: a 10 million image database for scene recognition. IEEE Transactions on Pattern Analysis and Machine Intelligence 40(6):1452\u20131464","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"8976_CR287","doi-asserted-by":"crossref","unstructured":"Zhou P, Ni B, Geng C, Hu J, Xu Y (2018) Scale-transferrable object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 528\u2013537","DOI":"10.1109\/CVPR.2018.00062"},{"key":"8976_CR288","doi-asserted-by":"crossref","unstructured":"Zhou P, Ni BB, Geng C, Hu JG, Xu Y (2018) Scale-transferrable object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 528\u2013537","DOI":"10.1109\/CVPR.2018.00062"},{"key":"8976_CR289","doi-asserted-by":"crossref","unstructured":"Zhou X, Zhuo J, Krahenbuhl P (2019) Bottom-up object detection by grouping extreme and center points. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 850\u2013859","DOI":"10.1109\/CVPR.2019.00094"},{"key":"8976_CR290","doi-asserted-by":"crossref","unstructured":"Zhu R, Zhang S, Wang X, Wen L, Shi H, Bo L, Mei T (2019) ScratchDet: training single-shot object detectors from scratch. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 2268\u20132277","DOI":"10.1109\/CVPR.2019.00237"},{"key":"8976_CR291","doi-asserted-by":"crossref","unstructured":"Zhu X, Dai J, Yuan L, Wei Y (2018) Towards high performance video object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 7210\u20137218","DOI":"10.1109\/CVPR.2018.00753"},{"key":"8976_CR292","doi-asserted-by":"crossref","unstructured":"Zhu X, Wang Y, Dai J, Yuan L, Wei Y (2017) Flow-guided feature aggregation for video object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 408\u2013417","DOI":"10.1109\/ICCV.2017.52"},{"key":"8976_CR293","doi-asserted-by":"crossref","unstructured":"Zhu X, Xiong Y, Dai J, Yuan L, Wei Y (2017) Deep feature flow for video recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4141\u20134150","DOI":"10.1109\/CVPR.2017.441"},{"key":"8976_CR294","unstructured":"Zhu Y, Urtasun R, Salakhutdinov R, Fidler S (2015) SegDeepM: exploiting segmentation and context in deep neural networks for object detection. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 4703\u20134711"},{"key":"8976_CR295","doi-asserted-by":"crossref","unstructured":"Zhu Y, Zhao C, Wang J, Zhao X, Wu Y, Lu H (2017) Couplenet: coupling global structure with local parts for object detection. In: Proceedings of the IEEE international conference on computer vision (ICCV), pp 4126\u20134134","DOI":"10.1109\/ICCV.2017.444"},{"key":"8976_CR296","doi-asserted-by":"crossref","unstructured":"Zitnick CL, Dollar P (2014) Edge boxes: locating object proposals from edges. In: European conference on computer vision (ECCV), pp 391\u2013405","DOI":"10.1007\/978-3-319-10602-1_26"},{"key":"8976_CR297","doi-asserted-by":"crossref","unstructured":"Zoph B, Vasudevan V, Shlens J, Le QV (2018) Learning transferable architectures for scalable image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), pp 8697\u20138710","DOI":"10.1109\/CVPR.2018.00907"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08976-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-020-08976-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-020-08976-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,7]],"date-time":"2024-08-07T14:21:32Z","timestamp":1723040492000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-020-08976-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2020,6,12]]},"references-count":297,"journal-issue":{"issue":"33-34","published-print":{"date-parts":[[2020,9]]}},"alternative-id":["8976"],"URL":"https:\/\/doi.org\/10.1007\/s11042-020-08976-6","relation":{},"ISSN":["1380-7501","1573-7721"],"issn-type":[{"value":"1380-7501","type":"print"},{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2020,6,12]]},"assertion":[{"value":"25 April 2019","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 February 2020","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"22 April 2020","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 June 2020","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}