{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,18]],"date-time":"2026-01-18T03:57:52Z","timestamp":1768708672382,"version":"3.49.0"},"reference-count":62,"publisher":"Springer Science and Business Media LLC","issue":"7","license":[{"start":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T00:00:00Z","timestamp":1742256000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T00:00:00Z","timestamp":1742256000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"name":"\u201dPioneer\u201d and \u201dLeading Goose\u201d R&D Program of Zhejiang","award":["2023C01030"],"award-info":[{"award-number":["2023C01030"]}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62122011"],"award-info":[{"award-number":["62122011"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["U21A20514"],"award-info":[{"award-number":["U21A20514"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,7]]},"DOI":"10.1007\/s11263-025-02405-7","type":"journal-article","created":{"date-parts":[[2025,3,18]],"date-time":"2025-03-18T20:09:25Z","timestamp":1742328565000},"page":"4712-4726","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A Solution to Co-occurrence Bias in Pedestrian Attribute Recognition: Theory, Algorithms, and Improvements"],"prefix":"10.1007","volume":"133","author":[{"given":"Yibo","family":"Zhou","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-6811-9209","authenticated-orcid":false,"given":"Hai-Miao","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Jinzuo","family":"Yu","sequence":"additional","affiliation":[]},{"given":"Haotian","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Shiliang","family":"Pu","sequence":"additional","affiliation":[]},{"given":"Hanzi","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,3,18]]},"reference":[{"key":"2405_CR1","unstructured":"Belghazi, M. I., Baratin, A., Rajeswar, S., Ozair, S., Bengio, Y., Courville, A., & Hjelm, R. D. (2018). Mine: Mutual information neural estimation. arXiv preprint arXiv:1801.04062."},{"key":"2405_CR2","unstructured":"Chen, X., Duan, Y., Houthooft, R., Schulman, J., Sutskever, I., & Abbeel, P. (2016). Infogan: Interpretable representation learning by information maximizing generative adversarial nets. Advances in Neural Information Processing Systems, 29."},{"key":"2405_CR3","doi-asserted-by":"crossref","unstructured":"Chen, Z., Li, A., & Wang, Y. (2019). A temporal attentive approach for video-based pedestrian attribute recognition. In Pattern recognition and computer vision: second Chinese conference, PRCV 2019, Xi\u2019an, China, November 8\u201311, 2019, Proceedings, Part II 2, Springer, pp. 209\u2013220.","DOI":"10.1007\/978-3-030-31723-2_18"},{"key":"2405_CR4","doi-asserted-by":"crossref","unstructured":"Deng, Y., Luo, P., Loy, C. C., & Tang, X. (2014). Pedestrian attribute recognition at far distance. In proceedings of the 22nd ACM international conference on multimedia, pp. 789\u2013792.","DOI":"10.1145\/2647868.2654966"},{"key":"2405_CR5","unstructured":"Do, K., & Tran, T. (2019). Theory and evaluation metrics for learning disentangled representations. arXiv preprint arXiv:1908.09961."},{"key":"2405_CR6","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1002\/cpa.3160280102","volume":"28","author":"MD Donsker","year":"1975","unstructured":"Donsker, M. D., & Varadhan, S. R. S. (1975). Asymptotic evaluation of certain Markov process expectations for large time, I. Communications on Pure and Applied Mathematics, 28, 1.","journal-title":"Communications on Pure and Applied Mathematics"},{"key":"2405_CR7","doi-asserted-by":"crossref","unstructured":"Fabbri, M., Calderara, S., & Cucchiara, R. (2017). Generative adversarial models for people attribute recognition in surveillance. In 2017 14th IEEE international conference on advanced video and signal based surveillance (AVSS), IEEE, pp. 1\u20136.","DOI":"10.1109\/AVSS.2017.8078521"},{"key":"2405_CR8","first-page":"1","volume":"99","author":"H Fan","year":"2020","unstructured":"Fan, H., Hu, H. M., Liu, S., Lu, W., & Pu, S. (2020). Correlation graph convolutional network for pedestrian attribute recognition. IEEE Transactions on Multimedia, 99, 1\u20131.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2405_CR9","doi-asserted-by":"publisher","first-page":"411","DOI":"10.1109\/TCSVT.2023.3285411","volume":"34","author":"X Fan","year":"2023","unstructured":"Fan, X., Zhang, Y., Lu, Y., & Wang, H. (2023). Parformer: Transformer-based multi-task network for pedestrian attribute recognition. IEEE Transactions on Circuits and Systems for Video Technology, 34, 411\u2013423.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2405_CR10","doi-asserted-by":"publisher","first-page":"1340","DOI":"10.1145\/3343031.3351003","volume":"19","author":"L Gao","year":"2019","unstructured":"Gao, L., Huang, D., Guo, Y., & Wang, Y. (2019). Pedestrian attribute recognition via hierarchical multi-task learning and relationship attention. Association for Computing Machinery MM, 19, 1340\u20131348. https:\/\/doi.org\/10.1145\/3343031.3351003","journal-title":"Association for Computing Machinery MM"},{"key":"2405_CR11","doi-asserted-by":"crossref","unstructured":"Guo, H., Zheng, K., Fan, X., Yu, H., & Wang, S. (2020). Visual attention consistency under image transforms for multi-label image classification. In 2019 IEEE\/CVF conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPR.2019.00082"},{"key":"2405_CR12","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., & Sun, J. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"2405_CR13","unstructured":"Higgins, I., Matthey, L., Pal, A., Burgess, C., Glorot, X., Botvinick, M., Mohamed, S., & Lerchner, A. (2017). Beta-vae: Learning basic visual concepts with a constrained variational framework. In International conference on learning representations."},{"key":"2405_CR14","doi-asserted-by":"crossref","unstructured":"Jeon, I., Lee, W., Pyeon, M., & Kim, G. (2021). Ib-gan: Disentangled representation learning with information bottleneck generative adversarial networks. In Proceedings of the AAAI conference on artificial intelligence, vol. 35, pp. 7926\u20137934.","DOI":"10.1609\/aaai.v35i9.16967"},{"key":"2405_CR15","doi-asserted-by":"crossref","unstructured":"Jia, J., Chen, X., & Huang, K. (2021a). Spatial and semantic consistency regularizations for pedestrian attribute recognition. In Proceedings of the IEEE\/CVF international conference on computer vision (ICCV), pp. 962\u2013971.","DOI":"10.1109\/ICCV48922.2021.00100"},{"key":"2405_CR16","unstructured":"Jia, J., Huang, H., Chen, X., & Huang, K. (2021b). Rethinking of pedestrian attribute recognition: A reliable evaluation under zero-shot pedestrian identity setting. arXiv preprint arXiv:2107.03576."},{"key":"2405_CR17","first-page":"1069","volume-title":"Learning disentangled attribute representations for robust pedestrian attribute recognition","author":"J Jia","year":"2022","unstructured":"Jia, J., Gao, N., He, F., Chen, X., & Huang, K. (2022). Learning disentangled attribute representations for robust pedestrian attribute recognition (pp. 1069\u20131077). AAAI Press."},{"issue":"2","key":"2405_CR18","doi-asserted-by":"publisher","first-page":"170","DOI":"10.1016\/j.patrec.2020.07.018","volume":"138","author":"Z Ji","year":"2020","unstructured":"Ji, Z., Hu, Z., He, E., Han, J., & Pang, Y. (2020). Pedestrian attribute recognition based on multiple time steps attention. Pattern Recognition Letters, 138(2), 170\u2013176.","journal-title":"Pattern Recognition Letters"},{"key":"2405_CR19","doi-asserted-by":"crossref","unstructured":"Jin, J., Wang, X., Zhu, Q., Wang, H., & Li, C. (2024). Pedestrian attribute recognition: A new benchmark dataset and a large language model augmented framework. arXiv preprint arXiv:2408.09720.","DOI":"10.1609\/aaai.v39i4.32434"},{"key":"2405_CR20","unstructured":"Kauffmann, J., Ruff, L., Montavon, G., & M\u00fcller, K. R. (2020). The clever hans effect in anomaly detection. arXiv preprint arXiv:2006.10609."},{"issue":"9","key":"2405_CR21","doi-asserted-by":"publisher","first-page":"3354","DOI":"10.1073\/pnas.1309933111","volume":"111","author":"JB Kinney","year":"2014","unstructured":"Kinney, J. B., & Atwal, G. S. (2014). Equitability, mutual information, and the maximal information coefficient. Proceedings of the National Academy of Sciences, 111(9), 3354\u20133359.","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"2405_CR22","doi-asserted-by":"crossref","unstructured":"Lampert, C. H., Nickisch, H., & Harmeling, S. (2009). Learning to detect unseen object classes by between-class attribute transfer. In IEEE computer society conference on computer vision and pattern recognition (CVPR).","DOI":"10.1109\/CVPRW.2009.5206594"},{"key":"2405_CR23","unstructured":"Lample, G., Zeghidour, N., Usunier, N., Bordes, A., Denoyer, L., & Ranzato, M. (2017). Fader networks: Manipulating images by sliding attributes. Advances in Neural Information Processing Systems, 30."},{"key":"2405_CR24","doi-asserted-by":"crossref","unstructured":"Li, W., Cao, Z., Feng, J., Zhou, J., & Lu, J. (2022). Label2label: A language modeling framework for multi-attribute learning. In X. I. I. Part (Ed.), Computer vision-ECCV 2022: 17th European conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings (pp. 562\u2013579). Springer.","DOI":"10.1007\/978-3-031-19775-8_33"},{"key":"2405_CR25","doi-asserted-by":"crossref","unstructured":"Li, D., Chen, X., Zhang, Z., & Huang, K. (2017). Learning deep context-aware features over body and latent parts for person re-identification. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 384\u2013393.","DOI":"10.1109\/CVPR.2017.782"},{"key":"2405_CR26","unstructured":"Li, D., Zhang, Z., Chen, X., Ling, H., & Huang, K. (2016). A richly annotated dataset for pedestrian attribute recognition. arXiv preprint arXiv:1603.07054."},{"key":"2405_CR27","unstructured":"Liu, P., Liu, X., Yan, J., & Shao, J. (2018). Localization guided learning for pedestrian attribute recognition. arXiv preprint arXiv:1808.09102."},{"key":"2405_CR28","doi-asserted-by":"crossref","unstructured":"Liu, Z., Mao, H., Wu, C. Y., Feichtenhofer, C., Darrell, T., & Xie, S. (2022). A convnet for the 2020s. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 11976\u201311986.","DOI":"10.1109\/CVPR52688.2022.01167"},{"key":"2405_CR29","doi-asserted-by":"crossref","unstructured":"Liu, X., Zhao, H., Tian, M., Sheng, L., Shao, J., Yi, S., Yan, J., & Wang, X. (2017). Hydraplus-net: Attentive deep features for pedestrian analysis. In Proceedings of the IEEE international conference on computer vision, pp. 350\u2013359.","DOI":"10.1109\/ICCV.2017.46"},{"issue":"4","key":"2405_CR30","doi-asserted-by":"publisher","first-page":"1575","DOI":"10.1109\/TIP.2018.2878349","volume":"28","author":"D Li","year":"2019","unstructured":"Li, D., Zhang, Z., Chen, X., & Huang, K. (2019). A richly annotated pedestrian dataset for person retrieval in real surveillance scenarios. IEEE Transactions on Image Processing, 28(4), 1575\u20131590.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2405_CR31","doi-asserted-by":"publisher","first-page":"2622","DOI":"10.1109\/TIFS.2023.3268887","volume":"18","author":"D Li","year":"2023","unstructured":"Li, D., Zhang, Z., Shan, C., & Wang, L. (2023). Incremental pedestrian attribute recognition via dual uncertainty-aware pseudo-labeling. IEEE Transactions on Information Forensics and Security, 18, 2622\u20132636.","journal-title":"IEEE Transactions on Information Forensics and Security"},{"key":"2405_CR32","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1109\/TMM.2023.3259686","volume":"26","author":"WQ Lu","year":"2023","unstructured":"Lu, W. Q., Hu, H. M., Yu, J., Zhou, Y., Wang, H., & Li, B. (2023). Orientation-aware pedestrian attribute recognition based on graph convolution network. IEEE Transactions on Multimedia, 26, 28\u201340.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2405_CR33","doi-asserted-by":"crossref","unstructured":"Meng, Q., Zhao, S., Huang, Z., & Zhou, F. (2021). Magface: A universal representation for face recognition and quality assessment. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition (CVPR), pp. 14225\u201314234.","DOI":"10.1109\/CVPR46437.2021.01400"},{"issue":"10","key":"2405_CR34","doi-asserted-by":"publisher","first-page":"2281","DOI":"10.1049\/ipr2.12195","volume":"15","author":"M Moghaddam","year":"2021","unstructured":"Moghaddam, M., Charmi, M., & Hassanpoor, H. (2021). Jointly human semantic parsing and attribute recognition with feature pyramid structure in efficientnets. IET Image Processing, 15(10), 2281\u20132291.","journal-title":"IET Image Processing"},{"issue":"11","key":"2405_CR35","doi-asserted-by":"publisher","first-page":"5847","DOI":"10.1109\/TIT.2010.2068870","volume":"56","author":"XL Nguyen","year":"2010","unstructured":"Nguyen, X. L., & Wainwright, M. J. (2010). Estimating divergence functionals and the likelihood ratio by convex risk minimization. IEEE Transactions on Information Theory, 56(11), 5847\u20135861.","journal-title":"IEEE Transactions on Information Theory"},{"issue":"2","key":"2405_CR36","doi-asserted-by":"publisher","first-page":"143","DOI":"10.1007\/s11263-006-8575-4","volume":"71","author":"Y Ran","year":"2007","unstructured":"Ran, Y., Weiss, I., Zheng, Q., & Davis, L. S. (2007). Pedestrian detection via periodic motion analysis. International Journal of Computer Vision, 71(2), 143\u2013160.","journal-title":"International Journal of Computer Vision"},{"key":"2405_CR37","unstructured":"Ruderman, A., Reid, M., Garc\u00eda-Garc\u00eda, D., & Petterson, J. (2012). Tighter variational representations of f-divergences via restriction to probability measures. arXiv preprint arXiv:1206.4664."},{"key":"2405_CR38","doi-asserted-by":"crossref","unstructured":"Sarafianos, N., & Kakadiaris, I. A. (2018). Deep imbalanced attribute classification using visual attention aggregation. In ECCV.","DOI":"10.1007\/978-3-030-01252-6_42"},{"key":"2405_CR39","unstructured":"Shalev, G., Adi, Y., & Keshet, J. (2018). Out-of-distribution detection using multiple semantic label representations. In Advances in neural information processing systems, pp. 7375\u20137385."},{"key":"2405_CR40","doi-asserted-by":"publisher","first-page":"110194","DOI":"10.1016\/j.patcog.2023.110194","volume":"148","author":"J Shen","year":"2024","unstructured":"Shen, J., Guo, T., Zuo, X., Fan, H., & Yang, W. (2024). Sspnet: Scale and spatial priors guided generalizable and interpretable pedestrian attribute recognition. Pattern Recognition, 148, 110194.","journal-title":"Pattern Recognition"},{"key":"2405_CR41","doi-asserted-by":"crossref","unstructured":"Specker, A., Cormier, M., & Beyerer, J. (2022). Upar: Unified pedestrian attribute recognition and person retrieval. ArXiv: abs\/2209.02522.","DOI":"10.1109\/WACV56688.2023.00104"},{"key":"2405_CR42","doi-asserted-by":"crossref","unstructured":"Tan, Z., Yang, Y., Wan, J., Guo, G., & Li, S. Z. (2020). Relation-aware pedestrian attribute recognition with graph convolutional networks. In Proceedings of the AAAI conference on artificial intelligence, vol. 34, No. (7), pp. 12055\u201312062.","DOI":"10.1609\/aaai.v34i07.6883"},{"key":"2405_CR43","doi-asserted-by":"crossref","unstructured":"Tang, C., Sheng, L., Zhang, Z. X., & Hu, X. (2019). Improving pedestrian attribute recognition with weakly-supervised multi-scale attribute-specific localization. In 2019 IEEE\/CVF international conference on computer vision (ICCV).","DOI":"10.1109\/ICCV.2019.00510"},{"key":"2405_CR44","doi-asserted-by":"crossref","unstructured":"Wang, X., Jin, J., Li, C., Tang, J., Zhang, C., & Wang, W. (2024). Pedestrian attribute recognition via clip based prompt vision-language fusion. IEEE Transactions on Circuits and Systems for Video Technology.","DOI":"10.1109\/TCSVT.2024.3454366"},{"key":"2405_CR45","doi-asserted-by":"crossref","unstructured":"Wang, J., Zhu, X., & Gong, S. (2016). Video semantic clustering with sparse and incomplete tags. In Proceedings of the thirtieth AAAI conference on artificial intelligence (AAAI-16).","DOI":"10.1609\/aaai.v30i1.10454"},{"key":"2405_CR46","doi-asserted-by":"publisher","first-page":"926","DOI":"10.1109\/LSP.2018.2822810","volume":"25","author":"F Wang","year":"2018","unstructured":"Wang, F., Cheng, J., Liu, W., & Liu, H. (2018). Additive margin softmax for face verification. IEEE Signal Processing Letters, 25, 926\u2013930.","journal-title":"IEEE Signal Processing Letters"},{"key":"2405_CR47","doi-asserted-by":"publisher","first-page":"108220","DOI":"10.1016\/j.patcog.2021.108220","volume":"121","author":"X Wang","year":"2022","unstructured":"Wang, X., Zheng, S., Yang, R., Zheng, A., Chen, Z., Tang, J., & Luo, B. (2022). Pedestrian attribute recognition: A survey. Pattern Recognition, 121, 108220.","journal-title":"Pattern Recognition"},{"key":"2405_CR48","doi-asserted-by":"publisher","first-page":"16","DOI":"10.1016\/j.artint.2017.05.002","volume":"250","author":"J Wang","year":"2017","unstructured":"Wang, J., Zhu, X., & Gong, S. (2017). Discovering visual concept structure with sparse and incomplete tags. Artificial Intelligence, 250, 16\u201336.","journal-title":"Artificial Intelligence"},{"key":"2405_CR49","doi-asserted-by":"publisher","first-page":"140","DOI":"10.1016\/j.neucom.2023.02.019","volume":"531","author":"D Weng","year":"2023","unstructured":"Weng, D., Tan, Z., Fang, L., & Guo, G. (2023). Exploring attribute localization and correlation for pedestrian attribute recognition. Neurocomputing, 531, 140\u2013150.","journal-title":"Neurocomputing"},{"key":"2405_CR50","doi-asserted-by":"crossref","unstructured":"Yan, X., Yang, J., Sohn, K., & Lee, H. (2016). Attribute2image: Conditional image generation from visual attributes. In Computer vision\u2013ECCV 2016: 14th European conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part IV 14, Springer, pp. 776\u2013791.","DOI":"10.1007\/978-3-319-46493-0_47"},{"key":"2405_CR51","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11263-021-01499-z","volume":"129","author":"Y Yang","year":"2021","unstructured":"Yang, Y., Tan, Z., Tiwari, P., Pandey, H. M., Wan, J., Lei, Z., & Li, S. Z. (2021). Cascaded split-and-aggregate learning with feature recombination for pedestrian attribute recognition. International Journal of Computer Vision, 129, 1\u201314.","journal-title":"International Journal of Computer Vision"},{"key":"2405_CR52","first-page":"1","volume":"99","author":"X Yongqin","year":"2017","unstructured":"Yongqin, X., Lampert, C. H., Bernt, S., & Zeynep, A. (2017). Zero-shot learning-a comprehensive evaluation of the good, the bad and the ugly. IEEE Transactions on Pattern Analysis and Machine Intelligence, 99, 1\u20131.","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"2405_CR53","doi-asserted-by":"crossref","unstructured":"Yue, Z., Wang, T., Sun, Q., Hua, X. S., & Zhang, H. (2021). Counterfactual zero-shot and open-set visual recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 15404\u201315414.","DOI":"10.1109\/CVPR46437.2021.01515"},{"key":"2405_CR54","doi-asserted-by":"crossref","unstructured":"Yutian, L., Liang, Z., Zhedong, Z., Yu, W., & Zhilan, H. (2019). Improving person re-identification by attribute and identity learning-sciencedirect. Pattern Recognition,95(C), 151\u2013161.","DOI":"10.1016\/j.patcog.2019.06.006"},{"key":"2405_CR55","unstructured":"Zhang, H., Cisse, M., Dauphin, Y. N., & Lopez-Paz, D. (2017). Mixup: Beyond empirical risk minimization. arXiv preprint arXiv:1710.09412."},{"key":"2405_CR56","doi-asserted-by":"publisher","first-page":"1875","DOI":"10.1007\/s11263-021-01461-z","volume":"129","author":"S Zhang","year":"2021","unstructured":"Zhang, S., Chen, D., Yang, J., & Schiele, B. (2021). Guided attention in CNNs for occluded pedestrian detection and re-identification. International Journal of Computer Vision, 129, 1875\u20131892.","journal-title":"International Journal of Computer Vision"},{"key":"2405_CR57","doi-asserted-by":"publisher","first-page":"105708","DOI":"10.1016\/j.engappai.2022.105708","volume":"119","author":"A Zheng","year":"2023","unstructured":"Zheng, A., Wang, H., Wang, J., Huang, H., He, R., & Hussain, A. (2023). Diverse features discovery transformer for pedestrian attribute recognition. Engineering Applications of Artificial Intelligence, 119, 105708.","journal-title":"Engineering Applications of Artificial Intelligence"},{"key":"2405_CR58","doi-asserted-by":"crossref","unstructured":"Zhou, Y. (2022). Rethinking reconstruction autoencoder-based out-of-distribution detection. In Proceedings of the IEEE conference on computer vision and pattern recognition.","DOI":"10.1109\/CVPR52688.2022.00723"},{"key":"2405_CR59","doi-asserted-by":"crossref","unstructured":"Zhou, B., Khosla, A., Lapedriza, A., Oliva, A., & Torralba, A. (2016). Learning deep features for discriminative localization. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2921\u20132929.","DOI":"10.1109\/CVPR.2016.319"},{"key":"2405_CR60","doi-asserted-by":"crossref","unstructured":"Zhu, J., Jin, J., Yang, Z., Wu, X., & Wang, X. (2023). Learning clip guided visual-text fusion transformer for video-based pedestrian attribute recognition. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 2626\u20132629.","DOI":"10.1109\/CVPRW59228.2023.00261"},{"key":"2405_CR61","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xu, C., & Tao, D. (2021). Where and what? examining interpretable disentangled representations. In Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 5861\u20135870.","DOI":"10.1109\/CVPR46437.2021.00580"},{"key":"2405_CR62","doi-asserted-by":"publisher","first-page":"224","DOI":"10.1016\/j.imavis.2016.07.004","volume":"58","author":"J Zhu","year":"2017","unstructured":"Zhu, J., Liao, S., Lei, Z., & Li, S. Z. (2017). Multi-label convolutional neural network based pedestrian attribute classification. Image Vision Computing, 58, 224\u2013229.","journal-title":"Image Vision Computing"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02405-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02405-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02405-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,7]],"date-time":"2025-06-07T06:03:36Z","timestamp":1749276216000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02405-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,18]]},"references-count":62,"journal-issue":{"issue":"7","published-print":{"date-parts":[[2025,7]]}},"alternative-id":["2405"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02405-7","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,18]]},"assertion":[{"value":"30 June 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"20 February 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"Public code for reproducing the benchmark results in original IJCAI paper is available at","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Code availability"}},{"value":"The authors have no conflicts of interest to declare that are relevant to the content of this article.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflicts of interest"}}]}}