{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,19]],"date-time":"2026-02-19T16:04:09Z","timestamp":1771517049498,"version":"3.50.1"},"reference-count":60,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2018,6,20]],"date-time":"2018-06-20T00:00:00Z","timestamp":1529452800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2019,3]]},"DOI":"10.1007\/s11263-018-1101-7","type":"journal-article","created":{"date-parts":[[2018,6,20]],"date-time":"2018-06-20T06:40:20Z","timestamp":1529476820000},"page":"225-238","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":71,"title":["Zoom Out-and-In Network with Map Attention Decision for Region Proposal and Object Detection"],"prefix":"10.1007","volume":"127","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-9446-6191","authenticated-orcid":false,"given":"Hongyang","family":"Li","sequence":"first","affiliation":[]},{"given":"Yu","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Wanli","family":"Ouyang","sequence":"additional","affiliation":[]},{"given":"Xiaogang","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2018,6,20]]},"reference":[{"issue":"11","key":"1101_CR1","doi-asserted-by":"publisher","first-page":"2189","DOI":"10.1109\/TPAMI.2012.28","volume":"34","author":"B Alexe","year":"2012","unstructured":"Alexe, B., Deselaers, T., & Ferrari, V. (2012). Measuring the objectness of image windows. IEEE Transactions on Pattern Analysis and Machine Intelligence, 34(11), 2189\u20132202.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"1101_CR2","doi-asserted-by":"crossref","unstructured":"Arbel\u00e1ez, P., Pont-Tuset, J., Barron, J., Marques, F., & Malik, J. (2014). Multiscale combinatorial grouping. In CVPR.","DOI":"10.1109\/CVPR.2014.49"},{"key":"1101_CR3","doi-asserted-by":"crossref","unstructured":"Bell, S., Zitnick, C.\u00a0L., Bala, K., & Girshick, R. (2016). Inside-outside net: Detecting objects in context with skip pooling and recurrent neural networks. In CVPR.","DOI":"10.1109\/CVPR.2016.314"},{"key":"1101_CR4","unstructured":"Chavali, N., Agrawal, H., Mahendru, A., & Batra, D. (2016). Object-proposal evaluation protocol is \u2018gameable\u2019. In: CVPR."},{"key":"1101_CR5","doi-asserted-by":"crossref","unstructured":"Cheng, M., Zhang, Z., Lin, W., & Torr, P. H.\u00a0S. (2014). BING: binarized normed gradients for objectness estimation at 300fps. In CVPR.","DOI":"10.1109\/CVPR.2014.414"},{"key":"1101_CR6","unstructured":"Chi, Z., Li, H., Lu, H., & Yang, M.-H. (2016). Dual deep network for visual tracking. \n                    arXiv:1612.06053\n                    \n                  ."},{"key":"1101_CR7","unstructured":"Dai, J., Li, Y., He, K., & Sun, J. (2016). R-FCN: Object detection via region-based fully convolutional networks. In NIPS."},{"key":"1101_CR8","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., & Wei, Y. (2017). Deformable convolutional networks. \n                    arXiv:1703.06211\n                    \n                  ."},{"key":"1101_CR9","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., & Fei-Fei, L. (2009). ImageNet: A large-scale hierarchical image database. In CVPR.","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1101_CR10","doi-asserted-by":"publisher","first-page":"222","DOI":"10.1109\/TPAMI.2013.122","volume":"36","author":"I Endres","year":"2014","unstructured":"Endres, I., & Hoiem, D. (2014). Category-independent object proposals with diverse ranking. IEEE Transactions on PAMI, 36, 222\u2013234.","journal-title":"IEEE Transactions on PAMI"},{"issue":"1","key":"1101_CR11","doi-asserted-by":"publisher","first-page":"98","DOI":"10.1007\/s11263-014-0733-5","volume":"111","author":"M Everingham","year":"2015","unstructured":"Everingham, M., Eslami, S. M. A., Van Gool, L., Williams, C. K. I., Winn, J., & Zisserman, A. (2015). The pascal visual object classes challenge: A retrospective. International Journal of Computer Vision, 111(1), 98\u2013136.","journal-title":"International Journal of Computer Vision"},{"key":"1101_CR12","unstructured":"Fu, C.-Y., Liu, W., Ranga, A., Tyagi, A., & Berg, A.\u00a0C. (2017). Dssd: Deconvolutional single shot detector. \n                    arXiv:1701.06659\n                    \n                  ."},{"key":"1101_CR13","unstructured":"Ghodrati, A., Diba, A., Pedersoli, M., Tuytelaars, T., & Gool, L.\u00a0V. (2016). DeepProposals: Hunting objects and actions by cascading deep convolutional layers. \n                    arXiv:1606.04702\n                    \n                  ."},{"key":"1101_CR14","doi-asserted-by":"crossref","unstructured":"Gidaris, S., & Komodakis, N. (2016). Attend refine repeat: Active box proposal generation via in-out localization. In BMVC.","DOI":"10.5244\/C.30.90"},{"key":"1101_CR15","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast R-CNN. In ICCV.","DOI":"10.1109\/ICCV.2015.169"},{"key":"1101_CR16","doi-asserted-by":"crossref","unstructured":"Girshick, R., Donahue, J., Darrell, T., & Malik, J. (2014). Rich feature hierarchies for accurate object detection and semantic segmentation. In CVPR.","DOI":"10.1109\/CVPR.2014.81"},{"key":"1101_CR17","doi-asserted-by":"publisher","first-page":"471","DOI":"10.1038\/nature20101","volume":"538","author":"A Graves","year":"2016","unstructured":"Graves, A., Wayne, G., Reynolds, M., Harley, T., Danihelka, I., Grabska-Barwi\u0144ska, A., et al. (2016). Hybrid computing using a neural network with dynamic external memory. Nature, 538, 471\u2013476.","journal-title":"Nature"},{"key":"1101_CR18","unstructured":"Hariharan, B., Arbelez, P., Girshick, R., & Malik, J. (2014). Hypercolumns for object segmentation and fine-grained localization. In CVPR."},{"key":"1101_CR19","doi-asserted-by":"crossref","unstructured":"Hayder, Z., He, X., & Salzmann, M. (2016). Learning to co-generate object proposals with a deep structured network. In CVPR.","DOI":"10.1109\/CVPR.2016.281"},{"key":"1101_CR20","unstructured":"He, S. & Lau, R.\u00a0W. (2015). Oriented object proposals. In: ICCV."},{"key":"1101_CR21","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In: CVPR."},{"key":"1101_CR22","doi-asserted-by":"publisher","first-page":"814","DOI":"10.1109\/TPAMI.2015.2465908","volume":"38","author":"J Hosang","year":"2015","unstructured":"Hosang, J., Benenson, R., Doll\u00e1r, P., & Schiele, B. (2015). What makes for effective detection proposals? IEEE Transactions on PAMI, 38, 814\u2013830.","journal-title":"IEEE Transactions on PAMI"},{"key":"1101_CR23","unstructured":"Hu, J., Shen, L., & Sun, G. (2017). Squeeze-and-excitation networks. \n                    arXiv:1709.01507\n                    \n                  ."},{"key":"1101_CR24","doi-asserted-by":"crossref","unstructured":"Huang, J., Rathod, V., Sun, C., Zhu, M., Korattikara, A., Fathi, A., Fischer, I., Wojna, Z., Song, Y., Guadarrama, S., & Murphy, K. (2017). Speed\/accuracy trade-offs for modern convolutional object detectors. In CVPR.","DOI":"10.1109\/CVPR.2017.351"},{"key":"1101_CR25","doi-asserted-by":"crossref","unstructured":"Humayun, A., Li, F., & Rehg, J.\u00a0M. (2014). Rigor: Reusing inference in graph cuts for generating object regions. In CVPR.","DOI":"10.1109\/CVPR.2014.50"},{"key":"1101_CR26","unstructured":"Ioffe, S., & Szegedy, C. (2015). Batch normalization: Accelerating deep network training by reducing internal covariate shift. In ICML."},{"key":"1101_CR27","doi-asserted-by":"crossref","unstructured":"Jia, Y., Shelhamer, E., Donahue, J., Karayev, S., Long, J., Girshick, R., Guadarrama, S., & Darrell, T. (2014). Caffe: Convolutional architecture for fast feature embedding. In ACM Multimedia.","DOI":"10.1145\/2647868.2654889"},{"key":"1101_CR28","doi-asserted-by":"publisher","first-page":"4525","DOI":"10.1109\/TIP.2016.2593342","volume":"25","author":"Z Jie","year":"2016","unstructured":"Jie, Z., Liang, X., Feng, J., Lu, W. F., Tay, E. H. F., & Yan, S. (2016). Scale-aware pixelwise object proposal networks. IEEE Transactions on Image Processing, 25, 4525\u20134539.","journal-title":"IEEE Transactions on Image Processing"},{"key":"1101_CR29","unstructured":"Kaiming, H., Xiangyu, Z., Shaoqing, R., & Sun, J. (2014). Spatial pyramid pooling in deep convolutional networks for visual recognition. In ECCV."},{"key":"1101_CR30","doi-asserted-by":"crossref","unstructured":"Kong, T., Yao, A., Chen, Y., & Sun, F. (2016). Hypernet: Towards accurate region proposal generation and joint object detection. In CVPR.","DOI":"10.1109\/CVPR.2016.98"},{"key":"1101_CR31","doi-asserted-by":"crossref","unstructured":"Krahenbuhl, P., & Koltun, V. (2014). Geodesic object proposals. In ECCV.","DOI":"10.1007\/978-3-319-10602-1_47"},{"key":"1101_CR32","doi-asserted-by":"crossref","unstructured":"Krahenbuhl, P., & Koltun, V. (2015). Learning to propose objects. In CVPR.","DOI":"10.1109\/CVPR.2015.7298765"},{"key":"1101_CR33","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G.\u00a0E. (2012). Imagenet classification with deep convolutional neural networks. In NIPS, (pp. 1106\u20131114)."},{"key":"1101_CR34","doi-asserted-by":"crossref","unstructured":"Kuo, W., Hariharan, B., & Malik, J. (2015). DeepBox: Learning objectness with convolutional networks. In ICCV.","DOI":"10.1109\/ICCV.2015.285"},{"key":"1101_CR35","unstructured":"Li, H., Liu, Y., Ouyang, W., & Wang, X. (2017a). Zoom out-and-in network with recursive training for object proposal. \n                    arXiv:1702.05711\n                    \n                  ."},{"key":"1101_CR36","doi-asserted-by":"crossref","unstructured":"Li, H., Liu, Y., Zhang, X., An, Z., Wang, J., Chen, Y., & Tong, J. (2017b). Do we really need more training data for object localization. In IEEE international conference on image processing. IEEE.","DOI":"10.1109\/ICIP.2017.8296386"},{"key":"1101_CR37","unstructured":"Li, H., Ouyang, W., & Wang, X. (2016). Multi-bias non-linear activation in deep neural networks. In ICML."},{"key":"1101_CR38","doi-asserted-by":"crossref","unstructured":"Lin, T.-Y., Dollar, P., Girshick, R., He, K., Hariharan, B., & Belongie, S. (2017). Feature pyramid networks for object detection. In CVPR.","DOI":"10.1109\/CVPR.2017.106"},{"key":"1101_CR39","unstructured":"Lin, T.-Y., Maire, M., Belongie, S., Bourdev, L., Girshick, R., Hays, J., Perona, P., Ramanan, D., Zitnick, C.\u00a0L., & Dollar, P. (2014). Microsoft COCO: Common objects in context. arXiv preprint:1405.0312."},{"key":"1101_CR40","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., & Reed, S. (2016). SSD: Single shot multibox detector. In ECCV.","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1101_CR41","unstructured":"Liu, Y., Li, H., & Wang, X. (2017a). Learning deep features via congenerous cosine loss for person recognition. \n                    arXiv:1702.06890\n                    \n                  ."},{"key":"1101_CR42","doi-asserted-by":"crossref","unstructured":"Liu, Y., Li, H., Yan, J., Wei, F., Wang, X., & Tang, X. (2017b). Recurrent scale approximation for object detection in cnn. In IEEE international conference on computer vision.","DOI":"10.1109\/ICCV.2017.69"},{"key":"1101_CR43","doi-asserted-by":"crossref","unstructured":"Long, J., Shelhamer, E., & Darrell, T. (2015). Fully convolutional networks for semantic segmentation. In CVPR.","DOI":"10.1109\/CVPR.2015.7298965"},{"key":"1101_CR44","doi-asserted-by":"crossref","unstructured":"Man\u00e9n, S., Guillaumin, M., & Van\u00a0Gool, L. (2013). Prime object proposals with randomized Prim\u2019s algorithm. In ICCV.","DOI":"10.1109\/ICCV.2013.315"},{"key":"1101_CR45","doi-asserted-by":"crossref","unstructured":"Newell, A., Yang, K., & Deng, J. (2016). Stacked hourglass networks for human pose estimation. In ECCV.","DOI":"10.1007\/978-3-319-46484-8_29"},{"key":"1101_CR46","doi-asserted-by":"crossref","unstructured":"Noh, H., Hong, S., & Han, B. (2015). Learning deconvolution network for semantic segmentation. In ICCV.","DOI":"10.1109\/ICCV.2015.178"},{"key":"1101_CR47","unstructured":"Pinheiro, P.\u00a0O., Collobert, R., & Dollar, P. (2015). Learning to segment object candidates. In NIPS."},{"key":"1101_CR48","doi-asserted-by":"crossref","unstructured":"Pinheiro, P.\u00a0O., Lin, T.-Y., Collobert, R., & Dollr, P. (2016). Learning to refine object segments. In ECCV.","DOI":"10.1007\/978-3-319-46448-0_5"},{"key":"1101_CR49","doi-asserted-by":"crossref","unstructured":"Pont-Tuset, J., & Gool, L.\u00a0V. (2015). Boosting object proposals: From pascal to coco. In CVPR.","DOI":"10.1109\/ICCV.2015.181"},{"key":"1101_CR50","doi-asserted-by":"crossref","unstructured":"Redmon, J., Divvala, S., Girshick, R., & Farhadi, A. (2016). You only look once: Unified, real-time object detection. In CVPR.","DOI":"10.1109\/CVPR.2016.91"},{"key":"1101_CR51","unstructured":"Redmon, J., & Farhadi, A. (2016). Yolo9000: Better, faster, stronger. \n                    arXiv:1612.08242\n                    \n                  ."},{"key":"1101_CR52","unstructured":"Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In NIPS."},{"key":"1101_CR53","unstructured":"Ronneberger, O., Fischer, P., & Brox, T. (2015). U-net: Convolutional networks for biomedical image segmentation. \n                    arXiv:1505.04597\n                    \n                  ."},{"key":"1101_CR54","unstructured":"Sermanet, P., Eigen, D., Zhang, X., Mathieu, M., Fergus, R., & LeCun, Y. (2014). Overfeat: Integrated recognition, localization and detection using convolutional networks. In ICLR."},{"key":"1101_CR55","unstructured":"Simonyan, K., & Zisserman, A. (2015). Very deep convolutional networks for large-scale image recognition. In International Conference on Learning Representations."},{"key":"1101_CR56","doi-asserted-by":"crossref","unstructured":"Sun, C., Paluri, M., Collobert, R., Nevatia, R., & Bourdev, L. (2016). ProNet: Learning to propose object-specific boxes for cascaded neural networks. In CVPR.","DOI":"10.1109\/CVPR.2016.379"},{"key":"1101_CR57","doi-asserted-by":"publisher","first-page":"154","DOI":"10.1007\/s11263-013-0620-5","volume":"10","author":"J Uijlings","year":"2013","unstructured":"Uijlings, J., van de Sande, K., Gevers, T., & Smeulders, A. (2013). Selective search for object recognition. International Journal of Computer Vision, 10, 154\u2013171.","journal-title":"International Journal of Computer Vision"},{"key":"1101_CR58","doi-asserted-by":"crossref","unstructured":"Wang, X., Shrivastava, A., & Gupta, A. (2017). A-fast-rcnn: Hard positive generation via adversary for object detection. In CVPR.","DOI":"10.1109\/CVPR.2017.324"},{"key":"1101_CR59","doi-asserted-by":"crossref","unstructured":"Wen, Y., Zhang, K., Li, Z., & Qiao, Y. (2016). A discriminative feature learning approach for deep face recognition. In ECCV.","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"1101_CR60","doi-asserted-by":"crossref","unstructured":"Zitnick, L., & Dollar, P. (2014). Edge Boxes: Locating object proposals from edges. In ECCV.","DOI":"10.1007\/978-3-319-10602-1_26"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-018-1101-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-018-1101-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-018-1101-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2019,6,19]],"date-time":"2019-06-19T19:08:33Z","timestamp":1560971313000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-018-1101-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2018,6,20]]},"references-count":60,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2019,3]]}},"alternative-id":["1101"],"URL":"https:\/\/doi.org\/10.1007\/s11263-018-1101-7","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2018,6,20]]},"assertion":[{"value":"16 July 2017","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2018","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 June 2018","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}