{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T16:50:06Z","timestamp":1781974206149,"version":"3.54.5"},"reference-count":87,"publisher":"Springer Science and Business Media LLC","issue":"4","license":[{"start":{"date-parts":[[2022,3,5]],"date-time":"2022-03-05T00:00:00Z","timestamp":1646438400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,3,5]],"date-time":"2022-03-05T00:00:00Z","timestamp":1646438400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2022,4]]},"DOI":"10.1007\/s11263-022-01591-y","type":"journal-article","created":{"date-parts":[[2022,3,5]],"date-time":"2022-03-05T07:02:48Z","timestamp":1646463768000},"page":"1088-1106","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":22,"title":["Visual Attention Consistency for Human Attribute Recognition"],"prefix":"10.1007","volume":"130","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-6677-5202","authenticated-orcid":false,"given":"Hao","family":"Guo","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Xiaochuan","family":"Fan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4152-5295","authenticated-orcid":false,"given":"Song","family":"Wang","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2022,3,5]]},"reference":[{"key":"1591_CR1","doi-asserted-by":"crossref","unstructured":"Bansal, N., Agarwal, C., & Nguyen, A. (2020). SAM: The sensitivity of attribution methods to hyperparameters. In IEEE conference on computer vision and pattern recognition (pp. 8673\u20138683).","DOI":"10.1109\/CVPR42600.2020.00870"},{"key":"1591_CR2","doi-asserted-by":"crossref","unstructured":"Bourdev, L., Maji, S., & Malik, J. (2011). Describing people: A poselet-based approach to attribute classification. In IEEE international conference on computer vision (pp. 1543\u20131550). IEEE.","DOI":"10.1109\/ICCV.2011.6126413"},{"key":"1591_CR3","unstructured":"Cohen, T., & Welling, M. (2016). Group equivariant convolutional networks. In International conference on machine learning (pp. 2990\u20132999)."},{"issue":"19","key":"1591_CR4","doi-asserted-by":"publisher","first-page":"R850","DOI":"10.1016\/j.cub.2004.09.041","volume":"14","author":"CE Connor","year":"2004","unstructured":"Connor, C. E., Egeth, H. E., & Yantis, S. (2004). Visual attention: Bottom-up versus top-down. Current Biology, 14(19), R850\u2013R852.","journal-title":"Current Biology"},{"key":"1591_CR5","unstructured":"Dabkowski, P., & Gal, Y. (2017). Real time image saliency for black box classifiers. In Advances in neural information processing systems (pp. 6967\u20136976)."},{"key":"1591_CR6","unstructured":"Dalal, N., & Triggs, B. (2005). Histograms of oriented gradients for human detection."},{"key":"1591_CR7","doi-asserted-by":"crossref","unstructured":"Deng, Y., Luo, P., Loy, C.C., & Tang, X. (2014). Pedestrian attribute recognition at far distance. In ACM International conference on multimedia (pp. 789\u2013792). ACM.","DOI":"10.1145\/2647868.2654966"},{"issue":"1","key":"1591_CR8","doi-asserted-by":"publisher","first-page":"193","DOI":"10.1146\/annurev.ne.18.030195.001205","volume":"18","author":"R Desimone","year":"1995","unstructured":"Desimone, R., & Duncan, J. (1995). Neural mechanisms of selective visual attention. Annual Review of Neuroscience, 18(1), 193\u2013222.","journal-title":"Annual Review of Neuroscience"},{"key":"1591_CR9","unstructured":"Dieleman, S., De Fauw, J., & Kavukcuoglu, K. (2016). Exploiting cyclic symmetry in convolutional neural networks. arXiv preprint arXiv:1602.02660"},{"issue":"2","key":"1591_CR10","doi-asserted-by":"publisher","first-page":"201","DOI":"10.3758\/BF03212870","volume":"12","author":"CW Eriksen","year":"1972","unstructured":"Eriksen, C. W., & Hoffman, J. E. (1972). Temporal and spatial characteristics of selective encoding from visual displays. Perception & Psychophysics, 12(2), 201\u2013204.","journal-title":"Perception & Psychophysics"},{"key":"1591_CR11","doi-asserted-by":"crossref","unstructured":"Feris, R., Bobbitt, R., Brown, L., & Pankanti, S. (2014). Attribute-based people search: Lessons learnt from a practical surveillance system. In International conference on multimedia retrieval (pp. 153\u2013160).","DOI":"10.1145\/2578726.2578732"},{"key":"1591_CR12","doi-asserted-by":"crossref","unstructured":"Fong, R. C., & Vedaldi, A. (2017). Interpretable explanations of black boxes by meaningful perturbation. In IEEE international conference on computer vision (pp. 3429\u20133437).","DOI":"10.1109\/ICCV.2017.371"},{"key":"1591_CR13","doi-asserted-by":"crossref","unstructured":"Gkioxari, G., Girshick, R., & Malik, J. (2015). Contextual action recognition with r* CNN. In IEEE international conference on computer vision (pp. 1080\u20131088).","DOI":"10.1109\/ICCV.2015.129"},{"key":"1591_CR14","doi-asserted-by":"publisher","first-page":"38","DOI":"10.1016\/j.patrec.2017.05.012","volume":"94","author":"H Guo","year":"2017","unstructured":"Guo, H., Fan, X., & Wang, S. (2017). Human attribute recognition by refining attention heat map. Pattern Recognition Letters, 94, 38\u201345.","journal-title":"Pattern Recognition Letters"},{"key":"1591_CR15","doi-asserted-by":"crossref","unstructured":"Guo, H., Zheng, K., Fan, X., Yu, H., & Wang, S. (2019). Visual attention consistency under image transforms for multi-label image classification. In IEEE conference on computer vision and pattern recognition (pp. 729\u2013739).","DOI":"10.1109\/CVPR.2019.00082"},{"key":"1591_CR16","unstructured":"Han, B., Yao, Q., Yu, X., Niu, G., Xu, M., Hu, W., Tsang, I., & Sugiyama, M. (2018). Co-teaching: Robust training of deep neural networks with extremely noisy labels. In Advances in neural information processing Systems (pp. 8527\u20138537)."},{"key":"1591_CR17","doi-asserted-by":"crossref","unstructured":"Han, K., Guo, J., Zhang, C., & Zhu, M. (2018). Attribute-aware attention model for fine-grained representation learning. In ACM international conference on multimedia (pp. 2040\u20132048).","DOI":"10.1145\/3240508.3240550"},{"key":"1591_CR18","doi-asserted-by":"crossref","unstructured":"Han, K., Wang, Y., Shu, H., Liu, C., Xu, C., & Xu, C. (2019). Attribute aware pooling for pedestrian attribute recognition. arXiv preprint arXiv:1907.11837","DOI":"10.24963\/ijcai.2019\/341"},{"key":"1591_CR19","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016) Deep residual learning for image recognition. In IEEE conference on computer vision and pattern recognition (pp. 770\u2013778).","DOI":"10.1109\/CVPR.2016.90"},{"key":"1591_CR20","unstructured":"Hinton, G., Vinyals, O., & Dean, J. (2015). Distilling the knowledge in a neural network. arXiv preprint arXiv:1503.02531"},{"key":"1591_CR21","doi-asserted-by":"crossref","unstructured":"Hinton, G. E., Krizhevsky, A., & Wang, S. D. (2011). Transforming auto-encoders. International conference on artificial neural networks (pp. 44\u201351). Springer.","DOI":"10.1007\/978-3-642-21735-7_6"},{"key":"1591_CR22","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., & Sun, G. (2017). Squeeze-and-excitation networks, 7. arXiv preprint arXiv:1709.01507","DOI":"10.1109\/CVPR.2018.00745"},{"key":"1591_CR23","doi-asserted-by":"crossref","unstructured":"Huang, G., Liu, Z., Weinberger, K.Q., & van der Maaten, L. (2017). Densely connected convolutional networks. In IEEE conference on computer vision and pattern recognition (Vol. 1, p. 3).","DOI":"10.1109\/CVPR.2017.243"},{"key":"1591_CR24","unstructured":"Jaderberg, M., Simonyan, K., Zisserman, A., & Kavukcuoglu, K. (2015). Spatial transformer networks. In Advances in neural information processing systems (pp. 2017\u20132025)."},{"key":"1591_CR25","doi-asserted-by":"crossref","unstructured":"Kivinen, J. J., & Williams, C. K. (2011). Transformation equivariant Boltzmann machines. International conference on artificial neural networks (pp. 1\u20139). Springer.","DOI":"10.1007\/978-3-642-21735-7_1"},{"key":"1591_CR26","doi-asserted-by":"crossref","unstructured":"Koch, C., & Ullman, S. (1987). Shifts in selective visual attention: Towards the underlying neural circuitry. Matters of intelligence (pp. 115\u2013141). Springer.","DOI":"10.1007\/978-94-009-3833-5_5"},{"issue":"14","key":"1591_CR27","doi-asserted-by":"publisher","first-page":"1428","DOI":"10.1016\/j.cub.2006.05.056","volume":"16","author":"K Koch","year":"2006","unstructured":"Koch, K., McLean, J., Segev, R., Freed, M. A., Berry, M. J., II., Balasubramanian, V., & Sterling, P. (2006). How much the eye tells the brain. Current Biology, 16(14), 1428\u20131434.","journal-title":"Current Biology"},{"key":"1591_CR28","unstructured":"Krizhevsky, A., Sutskever, I., & Hinton, G.E. (2012). Imagenet classification with deep convolutional neural networks. In Advances in neural information processing systems (pp. 1097\u20131105)."},{"issue":"2","key":"1591_CR29","doi-asserted-by":"publisher","first-page":"75","DOI":"10.1016\/j.tics.2004.12.004","volume":"9","author":"N Lavie","year":"2005","unstructured":"Lavie, N. (2005). Distracted and confused?: Selective attention under load. Trends in Cognitive Sciences, 9(2), 75\u201382.","journal-title":"Trends in Cognitive Sciences"},{"key":"1591_CR30","doi-asserted-by":"crossref","unstructured":"Lenc, K., & Vedaldi, A. (2015). Understanding image representations by measuring their equivariance and equivalence. In IEEE conference on computer vision and pattern recognition (pp. 991\u2013999).","DOI":"10.1109\/CVPR.2015.7298701"},{"key":"1591_CR31","doi-asserted-by":"crossref","unstructured":"Lenc, K., & Vedaldi, A. (2016). Learning covariant feature detectors. European conference on computer vision (pp. 100\u2013117). Springer.","DOI":"10.1007\/978-3-319-49409-8_11"},{"key":"1591_CR32","doi-asserted-by":"crossref","unstructured":"Li, D., Chen, X., & Huang, K. (2015). Multi-attribute learning for pedestrian attribute recognition in surveillance scenarios. In Asian conference on pattern recognition (pp. 111\u2013115). IEEE.","DOI":"10.1109\/ACPR.2015.7486476"},{"key":"1591_CR33","doi-asserted-by":"crossref","unstructured":"Li, D., Chen, X., Zhang, Z., & Huang, K. (2018). Pose guided deep model for pedestrian attribute recognition in surveillance scenarios. In International conference on multimedia and expo (pp. 1\u20136). IEEE.","DOI":"10.1109\/ICME.2018.8486604"},{"key":"1591_CR34","unstructured":"Li, D., Zhang, Z., Chen, X., Ling, H., & Huang, K. (2016). A richly annotated dataset for pedestrian attribute recognition. arXiv preprint arXiv:1603.07054"},{"key":"1591_CR35","doi-asserted-by":"crossref","unstructured":"Li, Q., Zhao, X., He, R., & Huang, K. (2019). Visual-semantic graph reasoning for pedestrian attribute recognition. In AAAI conference on artificial intelligence (Vol. 33, pp. 8634\u20138641).","DOI":"10.1609\/aaai.v33i01.33018634"},{"key":"1591_CR36","doi-asserted-by":"crossref","unstructured":"Li, Y., Huang, C., Loy, C. C., & Tang, X. (2016). Human attribute recognition by deep hierarchical contexts. In European conference on computer vision (pp. 684\u2013700). Springer.","DOI":"10.1007\/978-3-319-46466-4_41"},{"key":"1591_CR37","doi-asserted-by":"publisher","first-page":"151","DOI":"10.1016\/j.patcog.2019.06.006","volume":"95","author":"Y Lin","year":"2019","unstructured":"Lin, Y., Zheng, L., Zheng, Z., Wu, Y., Hu, Z., Yan, C., & Yang, Y. (2019). Improving person re-identification by attribute and identity learning. Pattern Recognition, 95, 151\u2013161.","journal-title":"Pattern Recognition"},{"key":"1591_CR38","unstructured":"Liu, P., Liu, X., Yan, J., & Shao, J. (2018). Localization guided learning for pedestrian attribute recognition. arXiv preprint arXiv:1808.09102"},{"key":"1591_CR39","doi-asserted-by":"crossref","unstructured":"Liu, X., Zhao, H., Tian, M., Sheng, L., Shao, J., Yan, J., & Wang, X. (2017). Hydraplus-net: Attentive deep features for pedestrian analysis. In IEEE international conference on computer vision (pp. 1\u20139).","DOI":"10.1109\/ICCV.2017.46"},{"key":"1591_CR40","unstructured":"Malach, E., & Shalev-Shwartz, S. (2017). Decoupling \u201cwhen to update\u201d from \u201chow to update\u201d. In Advances in neural information processing systems (pp. 960\u2013970)."},{"key":"1591_CR41","doi-asserted-by":"crossref","unstructured":"Marcos, D., Volpi, M., Komodakis, N., & Tuia, D. (2017). Rotation equivariant vector field networks. In IEEE international conference on computer vision (pp. 5048\u20135057).","DOI":"10.1109\/ICCV.2017.540"},{"issue":"4715","key":"1591_CR42","doi-asserted-by":"publisher","first-page":"782","DOI":"10.1126\/science.4023713","volume":"229","author":"J Moran","year":"1985","unstructured":"Moran, J., & Desimone, R. (1985). Selective attention gates visual processing in the extrastriate cortex. Science, 229(4715), 782\u2013784.","journal-title":"Science"},{"key":"1591_CR43","unstructured":"M\u00fcller, R., Kornblith, S., & Hinton, G. E. (2019). When does label smoothing help? In Advances in neural information processing systems (pp. 4694\u20134703)."},{"key":"1591_CR44","unstructured":"Niu, X., Han, H., Shan, S., & Chen, X. (2019). Multi-label co-regularization for semi-supervised facial action unit recognition. In Advances in neural information processing systems (pp. 909\u2013919)."},{"key":"1591_CR45","doi-asserted-by":"crossref","unstructured":"Oquab, M., Bottou, L., Laptev, I., & Sivic, J. (2015). Is object localization for free?-weakly-supervised learning with convolutional neural networks. In IEEE conference on computer vision and pattern recognition (pp. 685\u2013694).","DOI":"10.1109\/CVPR.2015.7298668"},{"key":"1591_CR46","doi-asserted-by":"crossref","unstructured":"Qiao, S., Shen, W., Zhang, Z., Wang, B., & Yuille, A. (2018). Deep co-training for semi-supervised image recognition. In European conference on computer vision (pp. 135\u2013152).","DOI":"10.1007\/978-3-030-01267-0_9"},{"key":"1591_CR47","unstructured":"Ravanbakhsh, S., Schneider, J., & Poczos, B. (2017). Equivariance through parameter-sharing. In International conference on machine learning (pp. 2892\u20132901). JMLR.org."},{"key":"1591_CR48","doi-asserted-by":"crossref","unstructured":"Ribeiro, M. T., Singh, S., & Guestrin, C. (2016). \u201cWhy should I trust you?\u201d explaining the predictions of any classifier. In ACM SIGKDD international conference on knowledge discovery and data mining (pp. 1135\u20131144).","DOI":"10.1145\/2939672.2939778"},{"key":"1591_CR49","doi-asserted-by":"crossref","unstructured":"Ribeiro, M. T., Singh, S., & Guestrin, C. (2018). Anchors: High-precision model-agnostic explanations. In AAAI conference on artificial intelligence.","DOI":"10.1609\/aaai.v32i1.11491"},{"key":"1591_CR50","doi-asserted-by":"crossref","unstructured":"Sarafianos, N., Xu, X., & Kakadiaris, I. A. (2018). Deep imbalanced attribute classification using visual attention aggregation. arXiv preprint arXiv:1807.03903","DOI":"10.1007\/978-3-030-01252-6_42"},{"key":"1591_CR51","unstructured":"Sarfraz, M. S., Schumann, A., Wang, Y., & Stiefelhagen, R. (2017). Deep view-sensitive pedestrian attribute inference in an end-to-end model. arXiv preprint arXiv:1707.06089"},{"key":"1591_CR52","doi-asserted-by":"crossref","unstructured":"Schmidt, U., & Roth, S. (2012). Learning rotation-aware features: From invariant priors to equivariant descriptors. In IEEE conference on computer vision and pattern recognition (pp. 2050\u20132057). IEEE.","DOI":"10.1109\/CVPR.2012.6247909"},{"key":"1591_CR53","doi-asserted-by":"crossref","unstructured":"Selvaraju, R. R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., & Batra, D. (2017). Grad-cam: Visual explanations from deep networks via gradient-based localization. In IEEE international conference on computer vision (pp. 618\u2013626).","DOI":"10.1109\/ICCV.2017.74"},{"key":"1591_CR54","unstructured":"Shrikumar, A., Greenside, P., & Kundaje, A. (2017). Learning important features through propagating activation differences. arXiv preprint arXiv:1704.02685"},{"key":"1591_CR55","unstructured":"Simonyan, K., Vedaldi, A., & Zisserman, A. (2013). Deep inside convolutional networks: Visualising image classification models and saliency maps. arXiv preprint arXiv:1312.6034"},{"key":"1591_CR56","unstructured":"Simonyan, K., & Zisserman, A. (2014). Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556"},{"key":"1591_CR57","doi-asserted-by":"crossref","unstructured":"Su, C., Zhang, S., Xing, J., Gao, W., & Tian, Q. (2016). Deep attributes driven multi-camera person re-identification. In European conference on computer vision (pp. 475\u2013491). Springer.","DOI":"10.1007\/978-3-319-46475-6_30"},{"key":"1591_CR58","doi-asserted-by":"crossref","unstructured":"Sudowe, P., Spitzer, H., & Leibe, B. (2015). Person attribute recognition with a jointly-trained holistic CNN model. In IEEE international conference on computer vision workshops (pp. 87\u201395).","DOI":"10.1109\/ICCVW.2015.51"},{"key":"1591_CR59","doi-asserted-by":"crossref","unstructured":"Sun, G., Khan, S., Li, W., Cholakkal, H., Khan, F., & Van Gool, L. (2020). Fixing localization errors to improve image classification. In European conference on computer vision.","DOI":"10.1007\/978-3-030-58595-2_17"},{"key":"1591_CR60","unstructured":"Sundararajan, M., Taly, A., & Yan, Q. (2017). Axiomatic attribution for deep networks. arXiv preprint arXiv:1703.01365"},{"key":"1591_CR61","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Liu, W., Jia, Y., Sermanet, P., Reed, S., Anguelov, D., Erhan, D., Vanhoucke, V., & Rabinovich, A. (2015). Going deeper with convolutions. In IEEE conference on computer vision and pattern recognition. IEEE.","DOI":"10.1109\/CVPR.2015.7298594"},{"key":"1591_CR62","doi-asserted-by":"crossref","unstructured":"Tan, Z., Yang, Y., Wan, J., Guo, G., & Li, S. Z. (2020). Relation-aware pedestrian attribute recognition with graph convolutional networks. In AAAI conference on artificial intelligence (pp. 12055\u201312062).","DOI":"10.1609\/aaai.v34i07.6883"},{"issue":"12","key":"1591_CR63","doi-asserted-by":"publisher","first-page":"6126","DOI":"10.1109\/TIP.2019.2919199","volume":"28","author":"Z Tan","year":"2019","unstructured":"Tan, Z., Yang, Y., Wan, J., Hang, H., Guo, G., & Li, S. Z. (2019). Attention-based pedestrian attribute analysis. IEEE Transactions on Image Processing, 28(12), 6126\u20136140.","journal-title":"IEEE Transactions on Image Processing"},{"key":"1591_CR64","doi-asserted-by":"crossref","unstructured":"Tang, C., Sheng, L., Zhang, Z., & Hu, X. (2019). Improving pedestrian attribute recognition with weakly-supervised multi-scale attribute-specific localization. In IEEE international conference on computer vision (pp. 4997\u20135006).","DOI":"10.1109\/ICCV.2019.00510"},{"key":"1591_CR65","unstructured":"Tarvainen, A., & Valpola, H. (2017). Mean teachers are better role models: Weight-averaged consistency targets improve semi-supervised deep learning results. In Advances in neural information processing systems (pp. 1195\u20131204)."},{"key":"1591_CR66","unstructured":"Thewlis, J., Bilen, H., & Vedaldi, A. (2017). Unsupervised learning of object frames by dense equivariant image labelling. In Advances in neural information processing systems (pp. 844\u2013855)."},{"key":"1591_CR67","doi-asserted-by":"crossref","unstructured":"Thewlis, J., Bilen, H., & Vedaldi, A. (2017). Unsupervised learning of object landmarks by factorized spatial embeddings. In IEEE international conference on computer vision (pp. 5916\u20135925).","DOI":"10.1109\/ICCV.2017.348"},{"key":"1591_CR68","doi-asserted-by":"crossref","unstructured":"Tian, Y., Luo, P., Wang, X., & Tang, X. (2015). Pedestrian detection aided by deep learning semantic tasks. In IEEE conference on computer vision and pattern recognition (pp. 5079\u20135087).","DOI":"10.1109\/CVPR.2015.7299143"},{"issue":"1","key":"1591_CR69","doi-asserted-by":"publisher","first-page":"97","DOI":"10.1016\/0010-0285(80)90005-5","volume":"12","author":"AM Treisman","year":"1980","unstructured":"Treisman, A. M., & Gelade, G. (1980). A feature-integration theory of attention. Cognitive Psychology, 12(1), 97\u2013136.","journal-title":"Cognitive Psychology"},{"key":"1591_CR70","doi-asserted-by":"crossref","unstructured":"Wang, F., Jiang, M., Qian, C., Yang, S., Li, C., Zhang, H., Wang, X., & Tang, X. (2017). Residual attention network for image classification. In IEEE conference on computer vision and pattern recognition (pp. 3156\u20133164).","DOI":"10.1109\/CVPR.2017.683"},{"key":"1591_CR71","doi-asserted-by":"crossref","unstructured":"Wang, J., Yang, Y., Mao, J., Huang, Z., Huang, C., & Xu, W. (2016). CNN-RNN: A unified framework for multi-label image classification. In IEEE conference on computer vision and pattern recognition (pp. 2285\u20132294). IEEE.","DOI":"10.1109\/CVPR.2016.251"},{"key":"1591_CR72","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhu, X., Gong, S., & Li, W. (2017). Attribute recognition by joint recurrent learning of context and correlation. In IEEE international conference on computer vision (pp. 531\u2013540).","DOI":"10.1109\/ICCV.2017.65"},{"key":"1591_CR73","unstructured":"Wang, X., Zheng, S., Yang, R., Luo, B., & Tang, J. (2019). Pedestrian attribute recognition: A survey. arXiv preprint arXiv:1901.07474"},{"key":"1591_CR74","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J. Y., & So Kweon, I. (2018). CBAM: Convolutional block attention module. In European conference on computer vision (pp. 3\u201319).","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"1591_CR75","doi-asserted-by":"crossref","unstructured":"Worrall, D., & Brostow, G. (2018). CubeNet: Equivariance to 3d rotation and translation. In European conference on computer vision (pp. 567\u2013584).","DOI":"10.1007\/978-3-030-01228-1_35"},{"key":"1591_CR76","doi-asserted-by":"crossref","unstructured":"Worrall, D. E., Garbin, S. J., Turmukhambetov, D., & Brostow, G. J. (2017). Harmonic networks: Deep translation and rotation equivariance. In IEEE conference on computer vision and pattern recognition (pp. 5028\u20135037).","DOI":"10.1109\/CVPR.2017.758"},{"key":"1591_CR77","doi-asserted-by":"crossref","unstructured":"Wu, M., Huang, D., Guo, Y., & Wang, Y. (2020). Distraction-aware feature learning for human attribute recognition via coarse-to-fine attention mechanism. In AAAI conference on artificial intelligence (pp. 12394\u201312401).","DOI":"10.1609\/aaai.v34i07.6925"},{"key":"1591_CR78","unstructured":"Zagoruyko, S., & Komodakis, N. (2016). Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer. arXiv preprint arXiv:1612.03928"},{"key":"1591_CR79","doi-asserted-by":"crossref","unstructured":"Zeiler, M. D., & Fergus, R. (2014). Visualizing and understanding convolutional networks. In European conference on computer vision (pp. 818\u2013833). Springer.","DOI":"10.1007\/978-3-319-10590-1_53"},{"key":"1591_CR80","doi-asserted-by":"crossref","unstructured":"Zhang, N., Paluri, M., Ranzato, M., Darrell, T., & Bourdev, L. (2014). Panda: Pose aligned networks for deep attribute modeling. In IEEE conference on computer vision and pattern recognition (pp. 1637\u20131644).","DOI":"10.1109\/CVPR.2014.212"},{"key":"1591_CR81","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Xiang, T., Hospedales, T. M., & Lu, H. (2018). Deep mutual learning. In IEEE conference on computer vision and pattern recognition (pp. 4320\u20134328).","DOI":"10.1109\/CVPR.2018.00454"},{"key":"1591_CR82","doi-asserted-by":"crossref","unstructured":"Zhao, X., Sang, L., Ding, G., Guo, Y., & Jin, X. (2018). Grouping attribute recognition for pedestrian with joint recurrent learning. In International joint conferences on artificial intelligence (pp. 3177\u20133183).","DOI":"10.24963\/ijcai.2018\/441"},{"key":"1591_CR83","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., & Torralba, A. (2016). Learning deep features for discriminative localization. In IEEE conference on computer vision and pattern recognition (pp. 2921\u20132929). IEEE.","DOI":"10.1109\/CVPR.2016.319"},{"issue":"1\u20132","key":"1591_CR84","doi-asserted-by":"publisher","first-page":"239","DOI":"10.1016\/S0004-3702(02)00190-X","volume":"137","author":"ZH Zhou","year":"2002","unstructured":"Zhou, Z. H., Wu, J., & Tang, W. (2002). Ensembling neural networks: Many could be better than all. Artificial Intelligence, 137(1\u20132), 239\u2013263.","journal-title":"Artificial Intelligence"},{"key":"1591_CR85","doi-asserted-by":"crossref","unstructured":"Zhu, F., Li, H., Ouyang, W., Yu, N., & Wang, X. (2017). Learning spatial regularization with image-level supervisions for multi-label image classification. In IEEE conference on computer vision and pattern recognition (pp. 5513\u20135522).","DOI":"10.1109\/CVPR.2017.219"},{"key":"1591_CR86","doi-asserted-by":"crossref","unstructured":"Zhu, J., Liao, S., Lei, Z., Yi, D., & Li, S. (2013). Pedestrian attribute classification in surveillance: Database and evaluation. In IEEE international conference on computer vision workshops (pp. 331\u2013338).","DOI":"10.1109\/ICCVW.2013.51"},{"key":"1591_CR87","unstructured":"Zintgraf, L. M., Cohen, T. S., Adel, T., & Welling, M. (2017). Visualizing deep neural network decisions: Prediction difference analysis. arXiv preprint arXiv:1702.04595"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01591-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-022-01591-y\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-022-01591-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,1,28]],"date-time":"2023-01-28T12:25:25Z","timestamp":1674908725000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-022-01591-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,3,5]]},"references-count":87,"journal-issue":{"issue":"4","published-print":{"date-parts":[[2022,4]]}},"alternative-id":["1591"],"URL":"https:\/\/doi.org\/10.1007\/s11263-022-01591-y","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2022,3,5]]},"assertion":[{"value":"6 March 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 February 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"5 March 2022","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}