{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,10]],"date-time":"2026-06-10T18:38:33Z","timestamp":1781116713887,"version":"3.54.1"},"reference-count":79,"publisher":"Springer Science and Business Media LLC","issue":"6-7","license":[{"start":{"date-parts":[[2019,3,2]],"date-time":"2019-03-02T00:00:00Z","timestamp":1551484800000},"content-version":"tdm","delay-in-days":0,"URL":"http:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2019,6]]},"DOI":"10.1007\/s11263-019-01157-5","type":"journal-article","created":{"date-parts":[[2019,3,2]],"date-time":"2019-03-02T12:17:05Z","timestamp":1551529025000},"page":"560-578","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":28,"title":["Hierarchical Attention for Part-Aware Face Detection"],"prefix":"10.1007","volume":"127","author":[{"given":"Shuzhe","family":"Wu","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Meina","family":"Kan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Shiguang","family":"Shan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xilin","family":"Chen","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2019,3,2]]},"reference":[{"key":"1157_CR1","doi-asserted-by":"crossref","unstructured":"Alahi, A., Ortiz, R., & Vandergheynst, P. (2012). FREAK: Fast retina keypoint. In The IEEE conference on computer vision and pattern recognition (CVPR), pp. 510\u2013517.","DOI":"10.1109\/CVPR.2012.6247715"},{"key":"1157_CR2","unstructured":"Alexe, B., Heess, N., Teh, Y. W., & Ferrari, V. (2012). Searching for objects driven by context. In Advances in neural information processing systems (NIPS), pp. 881\u2013889."},{"key":"1157_CR3","unstructured":"Ba, J. L., Mnih, V., & Kavukcuoglu, K. (2015). Multiple object recognition with visual attention. In International conference on learning representations (ICLR)."},{"key":"1157_CR4","doi-asserted-by":"crossref","unstructured":"Caicedo, J. C., & Lazebnik, S. (2015). Active object localization with deep reinforcement learning. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.286"},{"key":"1157_CR5","doi-asserted-by":"crossref","unstructured":"Chen, D., Ren, S., Wei, Y., Cao, X., & Sun, J. (2014). Joint cascade face detection and alignment. In European conference on compute vision (ECCV), pp. 109\u2013122.","DOI":"10.1007\/978-3-319-10599-4_8"},{"key":"1157_CR6","doi-asserted-by":"crossref","unstructured":"Chen, D., Hua, G., Wen, F., & Sun, J. (2016). Supervised transformer network for efficient face detection. In European conference on compute vision (ECCV), pp. 122\u2013138.","DOI":"10.1007\/978-3-319-46454-1_8"},{"key":"1157_CR7","doi-asserted-by":"crossref","unstructured":"Chen, L., Zhang, H., Xiao, J., Nie, L., Shao, J., Liu, W., Chua, T. S. (2017a). SCA-CNN: Spatial and channel-wise attention in convolutional networks for image captioning. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.667"},{"key":"1157_CR8","doi-asserted-by":"crossref","unstructured":"Chen, Y., Song, L., & He, R. (2017b). Masquer hunter: Adversarial occlusion-aware face detection. arXiv:1709.05188","DOI":"10.1109\/BTAS.2018.8698572"},{"key":"1157_CR9","unstructured":"Dai, J., Li, Y., He, K., & Sun, J. (2016). R-FCN: Object detection via region-based fully convolutional networks. In Advances in neural information processing systems (NIPS), pp. 379\u2013387."},{"key":"1157_CR10","doi-asserted-by":"crossref","unstructured":"Dai, J., Qi, H., Xiong, Y., Li, Y., Zhang, G., Hu, H., & Wei, Y. (2017). Deformable convolutional networks. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.89"},{"key":"1157_CR11","doi-asserted-by":"crossref","unstructured":"Ding, H., Zhou, H., Zhou, S. K., & Chellappa, R. (2018). A deep cascade network for unaligned face attribute classification. In The thirty-second AAAI conference on artificial intelligence (AAAI-18).","DOI":"10.1609\/aaai.v32i1.12303"},{"key":"1157_CR12","doi-asserted-by":"crossref","unstructured":"Farfade, S. S., Saberian, M., & Li, L. J. (2015). Multi-view face detection using deep convolutional neural networks. In International conference on multimedia retrieval (ICMR).","DOI":"10.1145\/2671188.2749408"},{"issue":"9","key":"1157_CR13","doi-asserted-by":"publisher","first-page":"1627","DOI":"10.1109\/TPAMI.2009.167","volume":"32","author":"PF Felzenszwalb","year":"2010","unstructured":"Felzenszwalb, P. F., Girshick, R. B., McAllester, D., & Ramanan, D. (2010). Object detection with discriminatively trained part-based models. IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 32(9), 1627\u20131645.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"},{"key":"1157_CR14","doi-asserted-by":"crossref","unstructured":"Fu, J., Zheng, H., & Mei, T (2017) Look closer to see better: Recurrent attention convolutional neural network for fine-grained image recognition. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.476"},{"key":"1157_CR15","doi-asserted-by":"crossref","unstructured":"Girshick, R. (2015). Fast R-CNN. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.169"},{"key":"1157_CR16","first-page":"1462","volume":"37","author":"K Gregor","year":"2015","unstructured":"Gregor, K., Danihelka, I., Graves, A., Rezende, D., & Wierstra, D. (2015). Draw: A recurrent neural network for image generation. International Conference on Machine Learning (ICML), 37, 1462\u20131471.","journal-title":"International Conference on Machine Learning (ICML)"},{"key":"1157_CR17","doi-asserted-by":"crossref","unstructured":"Hao, Z., Liu, Y., Qin, H., Yan, J., Li, X., Hu, X. (2017). Scale-aware face detection. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.207"},{"key":"1157_CR18","unstructured":"Hara, K., Liu, M. Y., Tuzel, O., Farahmand, A. M. (2017). Attentional network for visual object detection. CoRR. arXiv:1702.01478"},{"key":"1157_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In The IEEE conference on computer vision and pattern recognition (CVPR), pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"1157_CR20","doi-asserted-by":"crossref","unstructured":"He, P., Huang, W., He, T., Zhu, Q., Qiao, Y., & Li, X. (2017). Single shot text detector with regional attention. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.331"},{"issue":"8","key":"1157_CR21","doi-asserted-by":"publisher","first-page":"1735","DOI":"10.1162\/neco.1997.9.8.1735","volume":"9","author":"S Hochreiter","year":"1997","unstructured":"Hochreiter, S., & Schmidhuber, J. (1997). Long short-term memory. Neural Computation, 9(8), 1735\u20131780.","journal-title":"Neural Computation"},{"key":"1157_CR22","doi-asserted-by":"crossref","unstructured":"Hoiem, D., Chodpathumwan, Y., & Dai, Q. (2012). Diagnosing error in object detectors. In European conference on compute vision (ECCV).","DOI":"10.1007\/978-3-642-33712-3_25"},{"key":"1157_CR23","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., & Sun, G. (2018). Squeeze-and-excitation networks. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1157_CR24","doi-asserted-by":"crossref","unstructured":"Hu, P., & Ramanan, D. (2017). Finding tiny faces. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.166"},{"key":"1157_CR25","doi-asserted-by":"crossref","unstructured":"Huang, C., Ai, H., Li, Y., & Lao, S. (2006). Learning sparse features in granular space for multi-view face detection. In The IEEE international conference on automatic face gesture recognition (FG), pp. 401\u2013406.","DOI":"10.1109\/FGR.2006.70"},{"key":"1157_CR26","unstructured":"Jain, V., Learned-Miller, E. (2010). FDDB: A benchmark for face detection in unconstrained settings. Technical report UM-CS-2010-009, University of Massachusetts, Amherst."},{"key":"1157_CR27","doi-asserted-by":"crossref","unstructured":"Jia, Y., Shelhamer, E., Donahue, J., Karayev, S., Long, J., Girshick, R., Guadarrama, S., & Darrell, T. (2014). Caffe: Convolutional architecture for fast feature embedding. In ACM international conference on multimedia (MM), pp. 675\u2013678.","DOI":"10.1145\/2647868.2654889"},{"key":"1157_CR28","doi-asserted-by":"crossref","unstructured":"Jiang, H., & Learned-Miller, E. (2017). Face detection with the Faster R-CNN. In The IEEE international conference on automatic face gesture recognition (FG), pp. 650\u2013657.","DOI":"10.1109\/FG.2017.82"},{"key":"1157_CR29","unstructured":"Jie, Z., Liang, X., Feng, J., Jin, X., Lu, W., & Yan, S. (2016). Tree-structured reinforcement learning for sequential object localization. In Advances in neural information processing systems (NIPS), pp. 127\u2013135."},{"key":"1157_CR30","doi-asserted-by":"crossref","unstructured":"Le, V., Brandt, J., Lin, Z., Bourdev, L., & Huang, T. S. (2012). Interactive facial feature localization. In European conference on compute vision (ECCV), pp. 679\u2013692.","DOI":"10.1007\/978-3-642-33712-3_49"},{"key":"1157_CR31","doi-asserted-by":"crossref","unstructured":"Leutenegger, S., Chli, M., & Siegwart, R. Y. (2011). BRISK: Binary robust invariant scalable keypoints. In The IEEE international conference on computer vision (ICCV), pp. 2548\u20132555.","DOI":"10.1109\/ICCV.2011.6126542"},{"key":"1157_CR32","doi-asserted-by":"crossref","unstructured":"Li, H., Lin, Z., Shen, X., Brandt, J., & Hua, G. (2015). A convolutional neural network cascade for face detection. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2015.7299170"},{"key":"1157_CR33","doi-asserted-by":"crossref","unstructured":"Li, H., Liu, Y., Ouyang, W., & Wang, X. (2017a). Zoom out-and-in network with map attention decision for region proposal and object detection. CoRR. arXiv:1709.04347","DOI":"10.1007\/s11263-018-1101-7"},{"key":"1157_CR34","doi-asserted-by":"crossref","unstructured":"Li, J., & Zhang, Y. (2013). Learning SURF cascade for fast and accurate object detection. In The IEEE conference on computer vision and pattern recognition (CVPR), pp. 3468\u20133475.","DOI":"10.1109\/CVPR.2013.445"},{"issue":"5","key":"1157_CR35","doi-asserted-by":"publisher","first-page":"944","DOI":"10.1109\/TMM.2016.2642789","volume":"19","author":"J Li","year":"2017","unstructured":"Li, J., Wei, Y., Liang, X., Dong, J., Xu, T., Feng, J., et al. (2017b). Attentive contexts for object detection. IEEE Transactions on Multimedia (TMM), 19(5), 944\u2013954.","journal-title":"IEEE Transactions on Multimedia (TMM)"},{"key":"1157_CR36","doi-asserted-by":"crossref","unstructured":"Li, Y., Sun, B., Wu, T., & Wang, Y. (2016). Face detection with end-to-end integration of a convnet and a 3D model. In European conference on compute vision (ECCV), pp. 420\u2013436.","DOI":"10.1007\/978-3-319-46487-9_26"},{"key":"1157_CR37","doi-asserted-by":"publisher","first-page":"900","DOI":"10.1109\/ICIP.2002.1038171","volume":"1","author":"R Lienhart","year":"2002","unstructured":"Lienhart, R., & Maydt, J. (2002). An extended set of haar-like features for rapid object detection. International Conference on Image Processing (ICIP), 1, 900\u2013903.","journal-title":"International Conference on Image Processing (ICIP)"},{"key":"1157_CR38","unstructured":"Liu, C., & Shum, H. Y. (2003). Kullback-leibler boosting. In IEEE conference on computer vision and pattern recognition (CVPR), pp. 587\u2013594."},{"key":"1157_CR39","doi-asserted-by":"crossref","unstructured":"Liu, W., Anguelov, D., Erhan, D., Szegedy, C., Reed, S., Fu, C. Y., & Berg, A. C. (2016). SSD: Single shot multibox detector. In European conference on compute vision (ECCV), pp. 21\u201337.","DOI":"10.1007\/978-3-319-46448-0_2"},{"key":"1157_CR40","doi-asserted-by":"crossref","unstructured":"Liu, Y., Li, H., Yan, J., Wei, F., Wang, X., & Tang, X. (2017). Recurrent scale approximation for object detection in CNN. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.69"},{"key":"1157_CR41","doi-asserted-by":"crossref","unstructured":"Mathe, S., Pirinen, A., & Sminchisescu, C. (2016). Reinforcement learning for visual object detection. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.316"},{"key":"1157_CR42","doi-asserted-by":"crossref","unstructured":"Mathias, M., Benenson, R., Pedersoli, M., Van\u00a0Gool, L. (2014), Face detection without bells and whistles. In European conference on compute vision (ECCV), pp. 720\u2013735.","DOI":"10.1007\/978-3-319-10593-2_47"},{"key":"1157_CR43","doi-asserted-by":"crossref","unstructured":"Nada, H., Sindagi, V., Zhang, H., & Patel, V. M. (2018). Pushing the limits of unconstrained face detection: A challenge dataset and baseline results. CoRR. arXiv:1804.10275","DOI":"10.1109\/BTAS.2018.8698561"},{"key":"1157_CR44","doi-asserted-by":"crossref","unstructured":"Najibi, M., Samangouei, P., Chellappa, R., & Davis, L. S. (2017). SSH: Single stage headless face detector. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.522"},{"key":"1157_CR45","unstructured":"Osadchy, M., Miller, M. L., & Cun, Y. L. (2005). Synergistic face detection and pose estimation with energy-based models. In Advances in neural information processing systems, pp. 1017\u20131024."},{"key":"1157_CR46","unstructured":"Osadchy, M., Miller, M. L., & Cun, Y. L. (2005). Synergistic face detection and pose estimation with energy-based models. In Advances in neural information processing systems, pp. 1017\u20131024."},{"issue":"3","key":"1157_CR47","doi-asserted-by":"publisher","first-page":"211","DOI":"10.1007\/s11263-015-0816-y","volume":"115","author":"O Russakovsky","year":"2015","unstructured":"Russakovsky, O., Deng, J., Su, H., Krause, J., Satheesh, S., Ma, S., et al. (2015). ImageNet large scale visual recognition challenge. International Journal of Computer Vision, 115(3), 211\u2013252.","journal-title":"International Journal of Computer Vision"},{"key":"1157_CR48","doi-asserted-by":"crossref","unstructured":"Sagonas, C., Tzimiropoulos, G., Zafeiriou, S., & Pantic, M. (2013). A semi-automatic methodology for facial landmark annotation. In The IEEE conference on computer vision and pattern recognition (CVPR) workshops.","DOI":"10.1109\/CVPRW.2013.132"},{"key":"1157_CR49","doi-asserted-by":"crossref","unstructured":"Shih, K. J., Singh, S., & Hoiem, D. (2016). Where to look: Focus regions for visual question answering. In The IEEE conference on computer vision and pattern recognition (CVPR), pp. 4613\u20134621.","DOI":"10.1109\/CVPR.2016.499"},{"key":"1157_CR50","doi-asserted-by":"crossref","unstructured":"Shrivastava, A., Gupta, A., & Girshick, R. (2016). Training region-based object detectors with online hard example mining. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.89"},{"key":"1157_CR51","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. CoRR. arXiv:1409.1556"},{"key":"1157_CR52","doi-asserted-by":"crossref","unstructured":"Triantafyllidou, D., & Tefas, A. (2017). A fast deep convolutional neural network for face detection in big visual data. In INNS conference on big data, pp. 61\u201370.","DOI":"10.1007\/978-3-319-47898-2_7"},{"issue":"4","key":"1157_CR53","doi-asserted-by":"publisher","first-page":"245","DOI":"10.1049\/ip-vis:19941301","volume":"141","author":"R Vaillant","year":"1994","unstructured":"Vaillant, R., Monrocq, C., & Cun, Y. L. (1994). Original approach for the localisation of objects in images (ip-vis). IEE Proceedings - Vision, Image and Signal Processing, 141(4), 245\u2013250.","journal-title":"IEE Proceedings - Vision, Image and Signal Processing"},{"issue":"2","key":"1157_CR54","doi-asserted-by":"publisher","first-page":"137","DOI":"10.1023\/B:VISI.0000013087.49260.fb","volume":"57","author":"P Viola","year":"2004","unstructured":"Viola, P., & Jones, M. J. (2004). Robust real-time face detection. International Journal of Computer Vision (IJCV), 57(2), 137\u2013154.","journal-title":"International Journal of Computer Vision (IJCV)"},{"key":"1157_CR55","unstructured":"Wang, H., Li, Z., Ji, X., & Wang, Y. (2017a). Face R-CNN. CoRR. arXiv:1706.01061"},{"key":"1157_CR56","unstructured":"Wang, Y., Ji, X., Zhou, Z., Wang, H., & Li, Z. (2017b). Detecting faces using region-based fully convolutional networks. CoRR. arXiv:1709.05256"},{"key":"1157_CR57","doi-asserted-by":"crossref","unstructured":"Wang, Z., Chen, T., Li, G., Xu, R., & Lin, L. (2017c). Multi-label image recognition by recurrently discovering attentional regions. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.58"},{"key":"1157_CR58","doi-asserted-by":"crossref","unstructured":"Wen, Y., Zhang, K., Li, Z., & Qiao, Y. (2016). A discriminative feature learning approach for deep face recognition. In European conference on compute vision (ECCV), pp. 499\u2013515.","DOI":"10.1007\/978-3-319-46478-7_31"},{"key":"1157_CR59","doi-asserted-by":"publisher","first-page":"138","DOI":"10.1016\/j.neucom.2016.09.072","volume":"221","author":"S Wu","year":"2017","unstructured":"Wu, S., Kan, M., He, Z., Shan, S., & Chen, X. (2017). Funnel-structured cascade for multi-view face detection with alignment-awareness. Neurocomputing, 221, 138\u2013145.","journal-title":"Neurocomputing"},{"key":"1157_CR60","unstructured":"Xu, K., Ba, J., Kiros, R., Cho, K., Courville, A., Salakhudinov, R., Zemel, R., & Bengio, Y. (2015). Show, attend and tell: Neural image caption generation with visual attention. In International conference on machine learning (ICML), pp. 2048\u20132057."},{"key":"1157_CR61","doi-asserted-by":"crossref","unstructured":"Yan, J., Lei, Z., Wen, L., & Li, S. Z. (2014). The fastest deformable part model for object detection. In IEEE conference on computer vision and pattern recognition (CVPR), pp. 2497\u20132504.","DOI":"10.1109\/CVPR.2014.320"},{"key":"1157_CR62","unstructured":"Yang, B., Yan, J., Lei, Z., & Li, S. Z. (2014). Aggregate channel features for multi-view face detection. In The IEEE international joint conference on biometrics (IJCB), pp. 1\u20138."},{"key":"1157_CR63","doi-asserted-by":"crossref","unstructured":"Yang, S., Luo, P., Loy, C. C., & Tang, X. (2015). From facial parts responses to face detection: A deep learning approach. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2015.419"},{"key":"1157_CR64","doi-asserted-by":"crossref","unstructured":"Yang, S., Luo, P., Loy, C. C., & Tang, X. (2016a). WIDER FACE: A face detection benchmark. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2016.596"},{"key":"1157_CR65","unstructured":"Yang, S., Xiong, Y., Loy, C. C., & Tang, X. (2017). Face detection through scale-friendly deep convolutional networks. CoRR. arXiv:1706.02863"},{"key":"1157_CR66","doi-asserted-by":"crossref","unstructured":"Yang, Z., He, X., Gao, J., Deng, L., & Smola, A. (2016b). Stacked attention networks for image question answering. In The IEEE conference on computer vision and pattern recognition (CVPR), pp. 21\u201329.","DOI":"10.1109\/CVPR.2016.10"},{"key":"1157_CR67","doi-asserted-by":"crossref","unstructured":"Ye, Q., Yuan, S., & Kim, T. K. (2016). Spatial attention deep net with partial pso for hierarchical hybrid hand pose estimation. In European conference on compute vision (ECCV), pp. 346\u2013361.","DOI":"10.1007\/978-3-319-46484-8_21"},{"key":"1157_CR68","doi-asserted-by":"crossref","unstructured":"Yu, D., Fu, J., Mei, T., & Rui, Y. (2017). Multi-level attention networks for visual question answering. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2017.446"},{"key":"1157_CR69","doi-asserted-by":"crossref","unstructured":"Yu, J., Jiang, Y., Wang, Z., Cao, Z., & Huang, T. (2016). UnitBox: An advanced object detection network. In ACM on multimedia conference (MM), pp. 516\u2013520.","DOI":"10.1145\/2964284.2967274"},{"key":"1157_CR70","doi-asserted-by":"crossref","unstructured":"Zafeiriou, S., Trigeorgis, G., Chrysos, G., Deng, J., & Shen, J. (2017). The Menpo facial landmark localisation challenge: A step towards the solution. In The IEEE conference on computer vision and pattern recognition (CVPR) workshops.","DOI":"10.1109\/CVPRW.2017.263"},{"key":"1157_CR71","unstructured":"Zaremba, W., & Sutskever, I. (2014). Learning to execute. CoRR. arXiv:1410.4615"},{"key":"1157_CR72","doi-asserted-by":"crossref","unstructured":"Zhang, C., Zhang, Z. (2014). Improving multiview face detection with multi-task deep convolutional neural networks. In The IEEE winter conference on applications of computer vision (WACV), pp. 1036\u20131041.","DOI":"10.1109\/WACV.2014.6835990"},{"issue":"10","key":"1157_CR73","doi-asserted-by":"publisher","first-page":"1499","DOI":"10.1109\/LSP.2016.2603342","volume":"23","author":"K Zhang","year":"2016","unstructured":"Zhang, K., Zhang, Z., Li, Z., & Qiao, Y. (2016). Joint face detection and alignment using multitask cascaded convolutional networks. IEEE Signal Processing Letters (LSP), 23(10), 1499\u20131503.","journal-title":"IEEE Signal Processing Letters (LSP)"},{"key":"1157_CR74","doi-asserted-by":"crossref","unstructured":"Zhang, S., Zhu, X., Lei, Z., Shi, H., Wang, X., & Li, S. Z. (2017a). FaceBoxes: A cpu real-time face detector with high accuracy. In The IEEE\/IAPR international joint conference on biometrics (IJCB).","DOI":"10.1109\/BTAS.2017.8272675"},{"key":"1157_CR75","doi-asserted-by":"crossref","unstructured":"Zhang, S., Zhu, X., Lei, Z., Shi, H., Wang, X., & Li, S. Z. (2017b) $$\\text{S}^3$$ S 3 FD: Single shot scale-invariant face detector. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.30"},{"key":"1157_CR76","doi-asserted-by":"crossref","unstructured":"Zhang, S., Yang, J., & Schiele, B. (2018). Occluded pedestrian detection through guided attention in cnns. In The IEEE conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2018.00731"},{"key":"1157_CR77","doi-asserted-by":"crossref","unstructured":"Zheng, H., Fu, J., Mei, T., & Luo, J. (2017). Learning multi-attention convolutional neural network for fine-grained image recognition. In The IEEE international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2017.557"},{"key":"1157_CR78","unstructured":"Zhu, C., Zheng, Y., Luu, K., & Savvides, M. (2017). CMS-RCNN: Contextual multi-scale region-based CNN for unconstrained face detection. In B. Bhanu & A. Kumar (eds.), Deep learning for biometrics (pp. 57\u201379). Cham: Springer."},{"key":"1157_CR79","unstructured":"Zhu, X., & Ramanan, D. (2012). Face detection, pose estimation, and landmark localization in the wild. In IEEE conference on computer vision and pattern recognition (CVPR), pp. 2879\u20132886."}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01157-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/article\/10.1007\/s11263-019-01157-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"http:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-019-01157-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,7,15]],"date-time":"2024-07-15T09:37:56Z","timestamp":1721036276000},"score":1,"resource":{"primary":{"URL":"http:\/\/link.springer.com\/10.1007\/s11263-019-01157-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2019,3,2]]},"references-count":79,"journal-issue":{"issue":"6-7","published-print":{"date-parts":[[2019,6]]}},"alternative-id":["1157"],"URL":"https:\/\/doi.org\/10.1007\/s11263-019-01157-5","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2019,3,2]]},"assertion":[{"value":"15 February 2018","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"29 January 2019","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"2 March 2019","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}