{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,10]],"date-time":"2026-01-10T21:23:14Z","timestamp":1768080194169,"version":"3.49.0"},"reference-count":72,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2021,2,5]],"date-time":"2021-02-05T00:00:00Z","timestamp":1612483200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2021,2,5]],"date-time":"2021-02-05T00:00:00Z","timestamp":1612483200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"name":"Science and Technology Plan of Zhejiang Province of China","award":["No. 2017C01033"],"award-info":[{"award-number":["No. 2017C01033"]}]},{"name":"Civilian Fundamental Research","award":["No. D040301"],"award-info":[{"award-number":["No. D040301"]}]},{"name":"Collaborative Research grant from SenseTime Group","award":["No. TS1610626 & No. TS1712093"],"award-info":[{"award-number":["No. TS1610626 & No. TS1712093"]}]},{"name":"General Research Fund (GRF) of Hong Kong","award":["No. 14236516 & No. 14203518"],"award-info":[{"award-number":["No. 14236516 & No. 14203518"]}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2021,5]]},"DOI":"10.1007\/s11263-021-01434-2","type":"journal-article","created":{"date-parts":[[2021,2,5]],"date-time":"2021-02-05T09:20:37Z","timestamp":1612516837000},"page":"1376-1393","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":23,"title":["Towards Balanced Learning for Instance Recognition"],"prefix":"10.1007","volume":"129","author":[{"given":"Jiangmiao","family":"Pang","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kai","family":"Chen","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1672-6362","authenticated-orcid":false,"given":"Qi","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Zhihai","family":"Xu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Huajun","family":"Feng","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Jianping","family":"Shi","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Wanli","family":"Ouyang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Dahua","family":"Lin","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2021,2,5]]},"reference":[{"key":"1434_CR1","doi-asserted-by":"crossref","unstructured":"Cai, Z., Fan, Q., Feris, R. S., & Vasconcelos, N. (2016). A unified multi-scale deep convolutional neural network for fast object detection. In European conference on computer vision.","DOI":"10.1007\/978-3-319-46493-0_22"},{"key":"1434_CR2","doi-asserted-by":"crossref","unstructured":"Cai, Z., & Vasconcelos, N. (2018). Cascade R-CNN: Delving into high quality object detection. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00644"},{"key":"1434_CR3","unstructured":"Cao, K., Wei, C., Gaidon, A., Arechiga, N., & Ma, T. (2019). Learning imbalanced datasets with label-distribution-aware margin loss. In Advances in neural information processing systems (pp. 1567\u20131578)."},{"key":"1434_CR4","doi-asserted-by":"crossref","unstructured":"Chen, K., Pang, J., Wang, J., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Shi, J., & Ouyang, W., et\u00a0al. (2019). Hybrid task cascade for instance segmentation. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00511"},{"key":"1434_CR5","unstructured":"Chen, K., Wang, J., Pang, J., Cao, Y., Xiong, Y., Li, X., Sun, S., Feng, W., Liu, Z., Xu, J., Zhang, Z., Cheng, D., Zhu, C., Cheng, T., Zhao, Q., Li, B., Lu, X., Zhu, R., Wu, Y., Dai, J., Wang, J., Shi, J., Ouyang, W., Loy, C. C., & Lin, D. (2019). MMDetection: Open MMLab detection toolbox and benchmark. arXiv preprint arXiv:1906.07155."},{"key":"1434_CR6","doi-asserted-by":"crossref","unstructured":"Chen, X., Kundu, K., Zhang, Z., Ma, H., Fidler, S., & Urtasun, R. (2016). Monocular 3d object detection for autonomous driving. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2147\u20132156).","DOI":"10.1109\/CVPR.2016.236"},{"key":"1434_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., Ma, H., Wan, J., Li, B., & Xia, T. (2017). Multi-view 3d object detection network for autonomous driving. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 1907\u20131915).","DOI":"10.1109\/CVPR.2017.691"},{"key":"1434_CR8","doi-asserted-by":"crossref","unstructured":"Cheng, T., Wang, X., Huang, L., & Liu, W. (2020). Boundary-preserving mask R-CNN. In: European conference on computer vision (pp. 660\u2013676), Springer.","DOI":"10.1007\/978-3-030-58568-6_39"},{"key":"1434_CR9","doi-asserted-by":"crossref","unstructured":"Cui, Y., Jia, M., Lin, T.Y., Song, Y., & Belongie, S. (2019). Class-balanced loss based on effective number of samples. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 9268\u20139277).","DOI":"10.1109\/CVPR.2019.00949"},{"key":"1434_CR10","unstructured":"Dai, J., Li, Y., He, K., & Sun, J. (2016). R-FCN: Object detection via region-based fully convolutional networks. In Advances in neural information processing systems."},{"key":"1434_CR11","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., & Wei, Y. (2017). Deformable convolutional networks. In Proceedings of the IEEE international conference on computer vision (pp. 764\u2013773).","DOI":"10.1109\/ICCV.2017.89"},{"issue":"2","key":"1434_CR12","doi-asserted-by":"publisher","first-page":"303","DOI":"10.1007\/s11263-009-0275-4","volume":"88","author":"M Everingham","year":"2010","unstructured":"Everingham, M., Van Gool, L., Williams, C. K., Winn, J., & Zisserman, A. (2010). The pascal visual object classes (VOC) challenge. International Journal of Computer Vision, 88(2), 303\u2013338.","journal-title":"International Journal of Computer Vision"},{"key":"1434_CR13","doi-asserted-by":"crossref","unstructured":"Geiger, A., Lenz, P., & Urtasun, R. (2012). Are we ready for autonomous driving? the kitti vision benchmark suite. In 2012 IEEE conference on computer vision and pattern recognition, IEEE (pp. 3354\u20133361).","DOI":"10.1109\/CVPR.2012.6248074"},{"key":"1434_CR14","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast R-CNN. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/ICCV.2015.169"},{"key":"1434_CR15","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2014.81"},{"key":"1434_CR16","doi-asserted-by":"crossref","unstructured":"Gupta, A., Dollar, P., & Girshick, R. (2019). LVIS: A dataset for large vocabulary instance segmentation. In: Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 5356\u20135364).","DOI":"10.1109\/CVPR.2019.00550"},{"key":"1434_CR17","doi-asserted-by":"crossref","unstructured":"He, K., Gkioxari, G., Doll\u00e1r, P., & Girshick, R. (2017). Mask R-CNN. In IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2017.322"},{"key":"1434_CR18","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2014). Spatial pyramid pooling in deep convolutional networks for visual recognition. In European conference on computer vision.","DOI":"10.1007\/978-3-319-10578-9_23"},{"issue":"4","key":"1434_CR19","doi-asserted-by":"publisher","first-page":"18","DOI":"10.1109\/5254.708428","volume":"13","author":"MA Hearst","year":"1998","unstructured":"Hearst, M. A., Dumais, S. T., Osuna, E., Platt, J., & Scholkopf, B. (1998). Support vector machines. IEEE Intelligent Systems and their Applications, 13(4), 18\u201328.","journal-title":"IEEE Intelligent Systems and their Applications"},{"key":"1434_CR20","doi-asserted-by":"crossref","unstructured":"Hoiem, D., Chodpathumwan, Y., & Dai, Q. (2012). Diagnosing error in object detectors. In European conference on computer vision (pp. 340\u2013353), Springer.","DOI":"10.1007\/978-3-642-33712-3_25"},{"key":"1434_CR21","doi-asserted-by":"crossref","unstructured":"Hosang, J. H., Benenson, R., & Schiele, B. (2017). Learning non-maximum suppression. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.685"},{"key":"1434_CR22","doi-asserted-by":"crossref","unstructured":"Hu, H., Gu, J., Zhang, Z., Dai, J., & Wei, Y. (2018). Relation networks for object detection. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00378"},{"key":"1434_CR23","doi-asserted-by":"crossref","unstructured":"Huang, Z., Huang, L., Gong, Y., Huang, C., & Wang, X. (2019). Mask scoring R-CNN. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 6409\u20136418).","DOI":"10.1109\/CVPR.2019.00657"},{"key":"1434_CR24","unstructured":"Kang, B., Xie, S., Rohrbach, M., Yan, Z., Gordo, A., Feng, J., & Kalantidis, Y. (2019). Decoupling representation and classifier for long-tailed recognition. arXiv preprint arXiv:1910.09217."},{"key":"1434_CR25","doi-asserted-by":"crossref","unstructured":"Kang, K., Ouyang, W., Li, H., & Wang, X. (2016). Object detection from video tubelets with convolutional neural networks. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 817\u2013825).","DOI":"10.1109\/CVPR.2016.95"},{"issue":"8","key":"1434_CR26","doi-asserted-by":"crossref","first-page":"3573","DOI":"10.1109\/TNNLS.2017.2732482","volume":"29","author":"SH Khan","year":"2017","unstructured":"Khan, S. H., Hayat, M., Bennamoun, M., Sohel, F. A., & Togneri, R. (2017). Cost-sensitive learning of deep feature representations from imbalanced data. IEEE Transactions on Neural Networks and Learning Systems, 29(8), 3573\u20133587.","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"1434_CR27","doi-asserted-by":"crossref","unstructured":"Kim, S. W., Kook, H. K., Sun, J. Y., Kang, M. C., & Ko, S. J. (2018). Parallel feature pyramid network for object detection. In Proceedings of the European conference on computer vision (ECCV) (pp. 234\u2013250).","DOI":"10.1007\/978-3-030-01228-1_15"},{"key":"1434_CR28","doi-asserted-by":"crossref","unstructured":"Kong, T., Sun, F., Liu, H., Jiang, Y., Li, L., & Shi, J. (2020). Foveabox: Beyound anchor-based object detection. IEEE Transactions on Image Processing.","DOI":"10.1109\/TIP.2020.3002345"},{"key":"1434_CR29","doi-asserted-by":"crossref","unstructured":"Kong, T., Sun, F., Tan, C., Liu, H., & Huang, W. (2018). Deep feature pyramid reconfiguration for object detection. In European conference on computer vision.","DOI":"10.1007\/978-3-030-01228-1_11"},{"key":"1434_CR30","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097\u20131105)."},{"key":"1434_CR31","doi-asserted-by":"crossref","unstructured":"Law, H., & Deng, J. (2018). Cornernet: Detecting objects as paired keypoints. In European conference on computer vision.","DOI":"10.1007\/978-3-030-01264-9_45"},{"key":"1434_CR32","doi-asserted-by":"crossref","unstructured":"Law, H., & Deng, J. (2019). Cornernet: Detecting objects as paired keypoints.","DOI":"10.1007\/s11263-019-01204-1"},{"key":"1434_CR33","doi-asserted-by":"crossref","unstructured":"Li, B., Liu, Y., & Wang, X. (2019). Gradient harmonized single-stage detector. In Proceedings of the AAAI conference on artificial intelligence (Vol.\u00a033, pp. 8577\u20138584).","DOI":"10.1609\/aaai.v33i01.33018577"},{"issue":"3","key":"1434_CR34","doi-asserted-by":"publisher","first-page":"225","DOI":"10.1007\/s11263-018-1101-7","volume":"127","author":"H Li","year":"2019","unstructured":"Li, H., Liu, Y., Ouyang, W., & Wang, X. (2019). Zoom out-and-in network with map attention decision for region proposal and object detection. International Journal of Computer Vision, 127(3), 225\u2013238.","journal-title":"International Journal of Computer Vision"},{"key":"1434_CR35","doi-asserted-by":"crossref","unstructured":"Li, Y., Wang, T., Kang, B., Tang, S., Wang, C., Li, J., & Feng, J. (2020). Overcoming classifier imbalance for long-tail object detection with balanced group softmax. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 10991\u201311000).","DOI":"10.1109\/CVPR42600.2020.01100"},{"key":"1434_CR36","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Doll\u00e1r, P., Girshick, R. B., He, K., Hariharan, B., & Belongie, S. J. (2017). Feature pyramid networks for object detection. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2017.106"},{"issue":"2","key":"1434_CR37","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1109\/TPAMI.2018.2858826","volume":"42","author":"TY Lin","year":"2018","unstructured":"Lin, T. Y., Goyal, P., Girshick, R., He, K., & Doll\u00e1r, P. (2018). Focal loss for dense object detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, 42(2), 318\u2013327.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1434_CR38","doi-asserted-by":"crossref","unstructured":"Lin, T. Y., Maire, M., Belongie, S., Hays, J., Perona, P., Ramanan, D., Doll\u00e1r, P., & Zitnick, C. L. (2014). Microsoft coco: Common objects in context. In European conference on computer vision.","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"1434_CR39","doi-asserted-by":"publisher","first-page":"261","DOI":"10.1007\/s11263-019-01247-4","volume":"128","author":"L Liu","year":"2019","unstructured":"Liu, L., Ouyang, W., Wang, X., Fieguth, P., Chen, J., Liu, X., et al. (2019). Deep learning for generic object detection: A survey. International Journal of Computer Vision, 128, 261\u2013318.","journal-title":"International Journal of Computer Vision"},{"key":"1434_CR40","doi-asserted-by":"crossref","unstructured":"Liu, S., Qi, L., Qin, H., Shi, J., & Jia, J. (2018). Path aggregation network for instance segmentation. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00913"},{"key":"1434_CR41","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C.Y., & Berg, A.C. (2016). Ssd: Single shot multibox detector. In European conference on computer vision.","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1434_CR42","unstructured":"Luo, W., Xing, J., Milan, A., Zhang, X., Liu, W., Zhao, X., & Kim, T. K. (2014). Multiple object tracking: A literature review. arXiv preprint arXiv:1409.7618."},{"key":"1434_CR43","doi-asserted-by":"crossref","unstructured":"Ouyang, W., Wang, K., Zhu, X., & Wang, X. (2017). Chained cascade network for object detection. In IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2017.214"},{"key":"1434_CR44","doi-asserted-by":"crossref","unstructured":"Pang, J., Chen, K., Shi, J., Feng, H., Ouyang, W., & Lin, D. (2019). Libra R-CNN: Towards balanced learning for object detection. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00091"},{"key":"1434_CR45","unstructured":"Paszke, A., Gross, S., Massa, F., Lerer, A., Bradbury, J., Chanan, G., Killeen, T., Lin, Z., Gimelshein, N., Antiga, L., Desmaison, A., Kopf, A., Yang, E., DeVito, Z., Raison, M., Tejani, A., Chilamkurthy, S., Steiner, B., Fang, L., Bai, J., & Chintala, S. (2019). Pytorch: An imperative style, high-performance deep learning library. In Advances in neural information processing systems."},{"key":"1434_CR46","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., & Farhadi, A. (2016). You only look once: Unified, real-time object detection. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2016.91"},{"key":"1434_CR47","doi-asserted-by":"crossref","unstructured":"Redmon, J., & Farhadi, A. (2017). Yolo9000: Better, faster, stronger. arXiv preprint.","DOI":"10.1109\/CVPR.2017.690"},{"key":"1434_CR48","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster R-CNN: Towards real-time object detection with region proposal networks. In Advances in neural information processing systems."},{"key":"1434_CR49","doi-asserted-by":"crossref","unstructured":"Shen, L., Lin, Z., & Huang, Q. (2016). Relay backpropagation for effective learning of deep convolutional neural networks. In European conference on computer vision (pp. 467\u2013482), Springer.","DOI":"10.1007\/978-3-319-46478-7_29"},{"key":"1434_CR50","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Gupta, A., & Girshick, R. (2016). Training region-based object detectors with online hard example mining. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2016.89"},{"key":"1434_CR51","doi-asserted-by":"crossref","unstructured":"Singh, B., & Davis, L. S. (2018). An analysis of scale invariance in object detection\u2013snip. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2018.00377"},{"key":"1434_CR52","unstructured":"Singh, B., Najibi, M., & Davis, L. S. (2018). SNIPER: Efficient multi-scale training. In Advances in neural information processing systems."},{"key":"1434_CR53","doi-asserted-by":"crossref","unstructured":"Tan, J., Wang, C., Li, B., Li, Q., Ouyang, W., Yin, C., & Yan, J. (2020). Equalization loss for long-tailed object recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 11662\u201311671).","DOI":"10.1109\/CVPR42600.2020.01168"},{"key":"1434_CR54","doi-asserted-by":"crossref","unstructured":"Tian, Z., Shen, C., Chen, H., & He, T. (2019). Fcos: Fully convolutional one-stage object detection. In Proceedings of the IEEE international conference on computer vision (pp. 9627\u20139636).","DOI":"10.1109\/ICCV.2019.00972"},{"issue":"2","key":"1434_CR55","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"104","author":"JR Uijlings","year":"2013","unstructured":"Uijlings, J. R., Van De Sande, K. E., Gevers, T., & Smeulders, A. W. (2013). Selective search for object recognition. International Journal of Computer Vision, 104(2), 154\u2013171.","journal-title":"International Journal of Computer Vision"},{"key":"1434_CR56","doi-asserted-by":"crossref","unstructured":"Wang, J., Chen, K., Yang, S., Loy, C. C., & Lin, D. (2019). Region proposal by guided anchoring. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2965\u20132974).","DOI":"10.1109\/CVPR.2019.00308"},{"key":"1434_CR57","doi-asserted-by":"crossref","unstructured":"Wang, X., Girshick, R., Gupta, A., & He, K. (2017). Non-local neural networks, 10. arXiv preprint arXiv:1711.07971.","DOI":"10.1109\/CVPR.2018.00813"},{"key":"1434_CR58","unstructured":"Wang, Y. X., Ramanan, D., & Hebert, M. (2017). Learning to model the tail. In Advances in neural information processing systems (pp. 7029\u20137039)."},{"key":"1434_CR59","doi-asserted-by":"crossref","unstructured":"Wu, Y., & He, K. (2018). Group normalization. In Proceedings of the European conference on computer vision (ECCV) (pp. 3\u201319).","DOI":"10.1007\/978-3-030-01261-8_1"},{"key":"1434_CR60","doi-asserted-by":"crossref","unstructured":"Wu, Y., Lim, J., & Yang, M. H. (2013). Online object tracking: A benchmark. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 2411\u20132418).","DOI":"10.1109\/CVPR.2013.312"},{"key":"1434_CR61","doi-asserted-by":"crossref","unstructured":"Yang, Z., Liu, S., Hu, H., Wang, L., & Lin, S. (2019). Reppoints: Point set representation for object detection. arXiv preprint arXiv:1904.11490.","DOI":"10.1109\/ICCV.2019.00975"},{"key":"1434_CR62","doi-asserted-by":"crossref","unstructured":"Zeiler, M. D., & Fergus, R. (2014). Visualizing and understanding convolutional networks. In European conference on computer vision.","DOI":"10.1007\/978-3-319-10590-1_53"},{"issue":"9","key":"1434_CR63","doi-asserted-by":"publisher","first-page":"2109","DOI":"10.1109\/TPAMI.2017.2745563","volume":"40","author":"X Zeng","year":"2018","unstructured":"Zeng, X., Ouyang, W., Yan, J., Li, H., Xiao, T., Wang, K., et al. (2018). Crafting GBD-Net for object detection. IEEE Transactions on Pattern Analysis and Machine Intelligence, 40(9), 2109\u20132123.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1434_CR64","doi-asserted-by":"crossref","unstructured":"Zhang, S., Chi, C., Yao, Y., Lei, Z., & Li, S. Z. (2020). Bridging the gap between anchor-based and anchor-free detection via adaptive training sample selection. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (pp. 9759\u20139768).","DOI":"10.1109\/CVPR42600.2020.00978"},{"key":"1434_CR65","doi-asserted-by":"crossref","unstructured":"Zhang, S., Wen, L., Bian, X., Lei, Z., & Li, S. Z. (2017). Single-shot refinement neural network for object detection. arXiv preprint.","DOI":"10.1109\/CVPR.2018.00442"},{"issue":"6\u20137","key":"1434_CR66","doi-asserted-by":"publisher","first-page":"537","DOI":"10.1007\/s11263-019-01159-3","volume":"127","author":"S Zhang","year":"2019","unstructured":"Zhang, S., Wen, L., Shi, H., Lei, Z., Lyu, S., & Li, S. Z. (2019). Single-shot scale-aware network for real-time face detection. International Journal of Computer Vision, 127(6\u20137), 537\u2013559.","journal-title":"International Journal of Computer Vision"},{"key":"1434_CR67","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Sheng, T., Wang, Y., Tang, Z., Chen, Y., Cai, L., & Ling, H. (2019). M2det: A single-shot object detector based on multi-level feature pyramid network. In Proceedings of the AAAI conference on artificial intelligence (Vol.\u00a033, pp. 9259\u20139266).","DOI":"10.1609\/aaai.v33i01.33019259"},{"key":"1434_CR68","unstructured":"Zhou, X., Wang, D., & Kr\u00e4henb\u00fchl, P. (2019). Objects as points. arXiv preprint arXiv:1904.07850."},{"key":"1434_CR69","doi-asserted-by":"crossref","unstructured":"Zhou, X., Zhuo, J., & Krahenbuhl, P. (2019). Bottom-up object detection by grouping extreme and center points. In Proceedings of the IEEE conference on computer vision and pattern recognition (pp. 850\u2013859).","DOI":"10.1109\/CVPR.2019.00094"},{"key":"1434_CR70","doi-asserted-by":"crossref","unstructured":"Zhu, X., Pang, J., Yang, C., Shi, J., & Lin, D. (2019). Adapting object detectors via selective cross-domain alignment. In IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR.2019.00078"},{"key":"1434_CR71","doi-asserted-by":"crossref","unstructured":"Zhu, X., Wang, Y., Dai, J., Yuan, L., & Wei, Y. (2017). Flow-guided feature aggregation for video object detection. In Proceedings of the IEEE international conference on computer vision (pp. 408\u2013417).","DOI":"10.1109\/ICCV.2017.52"},{"key":"1434_CR72","unstructured":"Zou, Z., Shi, Z., Guo, Y., & Ye, J. (2019). Object detection in 20 years: A survey. arXiv preprint arXiv:1905.05055."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01434-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-021-01434-2\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-021-01434-2.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T16:54:40Z","timestamp":1724432080000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-021-01434-2"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2021,2,5]]},"references-count":72,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2021,5]]}},"alternative-id":["1434"],"URL":"https:\/\/doi.org\/10.1007\/s11263-021-01434-2","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2021,2,5]]},"assertion":[{"value":"11 February 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 January 2021","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 February 2021","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}