{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,15]],"date-time":"2026-04-15T19:27:14Z","timestamp":1776281234593,"version":"3.50.1"},"reference-count":116,"publisher":"Springer Science and Business Media LLC","issue":"11","license":[{"start":{"date-parts":[[2025,8,11]],"date-time":"2025-08-11T00:00:00Z","timestamp":1754870400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,8,11]],"date-time":"2025-08-11T00:00:00Z","timestamp":1754870400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Int J Comput Vis"],"published-print":{"date-parts":[[2025,11]]},"DOI":"10.1007\/s11263-025-02548-7","type":"journal-article","created":{"date-parts":[[2025,8,11]],"date-time":"2025-08-11T03:13:59Z","timestamp":1754882039000},"page":"7748-7771","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Clothing Purification with Causality Meets Vision-Language Pretraining Models"],"prefix":"10.1007","volume":"133","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8190-1438","authenticated-orcid":false,"given":"Zhengwei","family":"Yang","sequence":"first","affiliation":[]},{"given":"Huilin","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Nan","family":"Lei","sequence":"additional","affiliation":[]},{"given":"Basura","family":"Fernando","sequence":"additional","affiliation":[]},{"given":"Zheng","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2025,8,11]]},"reference":[{"key":"2548_CR1","doi-asserted-by":"crossref","unstructured":"Bai, Y., Cao, M., & Gao, D., et\u00a0al. (2023) Rasa: Relation and sensitivity aware representation learning for text-based person search. In Proceeding of the International Joint Conferences on Artificial Intelligence, pp 555\u2013563","DOI":"10.24963\/ijcai.2023\/62"},{"key":"2548_CR2","doi-asserted-by":"crossref","unstructured":"Bannur, S., Hyland, S.L., & Liu, Q., et\u00a0al. (2023). Learning to exploit temporal structure for biomedical vision-language processing. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 15016\u201315027.","DOI":"10.1109\/CVPR52729.2023.01442"},{"key":"2548_CR3","doi-asserted-by":"crossref","unstructured":"Bansal, V., Foresti, G.L., & Martinel, N. (2022). Cloth-changing person re-identification with self-attention. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision Workshops, pp 602\u2013610.","DOI":"10.1109\/WACVW54805.2022.00066"},{"key":"2548_CR4","doi-asserted-by":"crossref","unstructured":"Barbosa, I.B., Cristani, M., & Bue, A.D., et\u00a0al (2012). Re-identification with RGB-D sensors. In European conference on computer vision Workshops, Springer, pp 433\u2013442","DOI":"10.1007\/978-3-642-33863-2_43"},{"key":"2548_CR5","doi-asserted-by":"publisher","first-page":"2352","DOI":"10.1109\/TIP.2022.3141868","volume":"31","author":"C Chen","year":"2022","unstructured":"Chen, C., Ye, M., Qi, M., et al. (2022). Structure-aware positional transformer for visible-infrared person re-identification. IEEE Transactions on Image Processing, 31, 2352\u20132364.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR6","doi-asserted-by":"crossref","unstructured":"Chen, C., Ye, M., & Jiang, D. (2023a). Towards modality-agnostic person re-identification with descriptive query. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 15128\u201315137.","DOI":"10.1109\/CVPR52729.2023.01452"},{"key":"2548_CR7","doi-asserted-by":"crossref","unstructured":"Chen, J., Gao, Z., & Wu, X., et\u00a0al. (2023b). Meta-causal learning for single domain generalization. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 7683\u20137692.","DOI":"10.1109\/CVPR52729.2023.00742"},{"key":"2548_CR8","doi-asserted-by":"crossref","unstructured":"Chen, J., Jiang, X., & Wang, F., et\u00a0al. (2021). Learning 3d shape feature for texture-insensitive person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 8146\u20138155.","DOI":"10.1109\/CVPR46437.2021.00805"},{"key":"2548_CR9","doi-asserted-by":"crossref","unstructured":"Chen, T., Ding, S., & Xie, J., et\u00a0al. (2019). Abd-net: Attentive but diverse person re-identification. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 8351\u20138361","DOI":"10.1109\/ICCV.2019.00844"},{"key":"2548_CR10","doi-asserted-by":"crossref","unstructured":"Chen, Z., Li, G., & Wan, X. (2022b). Align, reason and learn: Enhancing medical vision-and-language pre-training with knowledge. In Proceedings of the ACM International Conference on Multimedia, pp 5152\u20135161.","DOI":"10.1145\/3503161.3547948"},{"key":"2548_CR11","doi-asserted-by":"crossref","unstructured":"Ci, Y., Wang, Y., & Chen, M., et\u00a0al. (2023). Unihcp: A unified model for human-centric perceptions. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 17840\u201317852.","DOI":"10.1109\/CVPR52729.2023.01711"},{"issue":"8","key":"2548_CR12","doi-asserted-by":"publisher","first-page":"4415","DOI":"10.1109\/TCSVT.2023.3241988","volume":"33","author":"Z Cui","year":"2023","unstructured":"Cui, Z., Zhou, J., Peng, Y., et al. (2023). Dcr-reid: Deep component reconstruction for cloth-changing person re-identification. IEEE Transactions on Circuits and Systems for Video Technology, 33(8), 4415\u20134428.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2548_CR13","doi-asserted-by":"crossref","unstructured":"Dash, S., Balasubramanian, V.N., & Sharma, A. (2022). Evaluating and mitigating bias in image classifiers: A causal perspective using counterfactuals. In Proc. IEEE\/CVF Winter Conference on Appl. Comput. Vis., pp 3879\u20133888","DOI":"10.1109\/WACV51458.2022.00393"},{"key":"2548_CR14","first-page":"32942","volume":"35","author":"ZY Dou","year":"2022","unstructured":"Dou, Z. Y., Kamath, A., Gan, Z., et al. (2022). Coarse-to-fine vision-language pre-training with fusion in the backbone. Advances in Neural Information Processing Systems, 35, 32942\u201332956.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2548_CR15","doi-asserted-by":"crossref","unstructured":"Du, Y., Wei, F., & Zhang, Z., et\u00a0al. (2022). Learning to prompt for open-vocabulary object detection with vision-language model. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 14084\u201314093.","DOI":"10.1109\/CVPR52688.2022.01369"},{"key":"2548_CR16","doi-asserted-by":"crossref","unstructured":"Fenton, N.E., Neil, M., & Constantinou, A.C. (2020). The book of why: The new science of cause and effect, judea pearl, dana mackenzie. basic books (2018). Artificial Intelligence 284:103286.","DOI":"10.1016\/j.artint.2020.103286"},{"key":"2548_CR17","doi-asserted-by":"crossref","unstructured":"Fu, Y., Wei, Y., & Zhou, Y., et\u00a0al. (2019). Horizontal pyramid matching for person re-identification. In. Proceedings of the AAAI Conference on Artificial Intelligence, pp 8295\u20138302.","DOI":"10.1609\/aaai.v33i01.33018295"},{"key":"2548_CR18","doi-asserted-by":"crossref","unstructured":"Gao, Z., Wei, H., & Guan, W., et\u00a0al. (2022). Multigranular visual-semantic embedding for cloth-changing person re-identification. In Proceedings of the ACM International Conference on Multimedia, pp 3703\u20133711.","DOI":"10.1145\/3503161.3547884"},{"key":"2548_CR19","doi-asserted-by":"crossref","unstructured":"Gu, X., Chang, H., & Ma, B., et\u00a0al. (2022). Clothes-changing person re-identification with RGB modality only. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 1050\u20131059.","DOI":"10.1109\/CVPR52688.2022.00113"},{"issue":"4","key":"2548_CR20","first-page":"75:1","volume":"53","author":"R Guo","year":"2020","unstructured":"Guo, R., Cheng, L., Li, J., et al. (2020). A survey of learning causality with data: Problems and methods. ACM Computing Surveys, 53(4), 75:1-75:37.","journal-title":"ACM Computing Surveys"},{"key":"2548_CR21","unstructured":"Han, K., Gong, S., & Huang, Y., et\u00a0al. (2023). Clothing-change feature augmentation for person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 22066\u201322075."},{"key":"2548_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., & Ren, S., et\u00a0al. (2016). Deep residual learning for image recognition. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 770\u2013778.","DOI":"10.1109\/CVPR.2016.90"},{"key":"2548_CR23","doi-asserted-by":"publisher","first-page":"120","DOI":"10.1109\/TIFS.2023.3318956","volume":"19","author":"S He","year":"2023","unstructured":"He, S., Chen, W., Wang, K., et al. (2023). Region generation and assessment network for occluded person re-identification. IEEE Transactions on Information Forensics and Security, 19, 120\u2013132.","journal-title":"IEEE Transactions on Information Forensics and Security"},{"key":"2548_CR24","doi-asserted-by":"crossref","unstructured":"He, S., Luo, H., & Wang, P., et\u00a0al. (2021). Transreid: Transformer-based object re-identification. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 15013\u201315022.","DOI":"10.1109\/ICCV48922.2021.01474"},{"key":"2548_CR25","doi-asserted-by":"crossref","unstructured":"Hong, P., Wu, T., & Wu, A., et\u00a0al. (2021). Fine-grained shape-appearance mutual learning for cloth-changing person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 10513\u201310522.","DOI":"10.1109\/CVPR46437.2021.01037"},{"key":"2548_CR26","doi-asserted-by":"crossref","unstructured":"Hou, R., Ma, B., & Chang, H., et\u00a0al. (2019). Interaction-and-aggregation network for person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 9317\u20139326.","DOI":"10.1109\/CVPR.2019.00954"},{"key":"2548_CR27","doi-asserted-by":"crossref","unstructured":"Huang, H., Li, D., & Zhang, Z., et\u00a0al. (2018). Adversarially occluded samples for person re-identification. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 5098\u20135107.","DOI":"10.1109\/CVPR.2018.00535"},{"key":"2548_CR28","doi-asserted-by":"crossref","unstructured":"Huang, Y., Wu, Q., & Xu, J., et\u00a0al. (2021). Clothing status awareness for long-term person re-identification. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 11875\u201311884.","DOI":"10.1109\/ICCV48922.2021.01168"},{"key":"2548_CR29","doi-asserted-by":"publisher","first-page":"4227","DOI":"10.1109\/TIP.2022.3183469","volume":"31","author":"X Jia","year":"2022","unstructured":"Jia, X., Zhong, X., Ye, M., et al. (2022). Complementary data augmentation for cloth-changing person re-identification. IEEE Transactions on Image Processing, 31, 4227\u20134239.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR30","doi-asserted-by":"crossref","unstructured":"Jin, X., He, T., & Zheng, K., et\u00a0al. (2022). Cloth-changing person re-identification from A single image with gait prediction and regularization. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 14258\u201314267.","DOI":"10.1109\/CVPR52688.2022.01388"},{"key":"2548_CR31","unstructured":"Kingma, D.P., & Ba, J. (2015). Adam: A method for stochastic optimization. In Proceedings of the International Conference on Learning Representations, pp 1\u201315."},{"key":"2548_CR32","doi-asserted-by":"crossref","unstructured":"Kirillov, A., Mintun, E., & Ravi, N., et\u00a0al. (2023). Segment anything. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 4015\u20134026.","DOI":"10.1109\/ICCV51070.2023.00371"},{"issue":"3","key":"2548_CR33","first-page":"253","volume":"6","author":"K Kuang","year":"2020","unstructured":"Kuang, K., Li, L., Geng, Z., et al. (2020). Causal inference. Eng, 6(3), 253\u2013263.","journal-title":"Causal inference. Eng"},{"key":"2548_CR34","doi-asserted-by":"publisher","first-page":"334","DOI":"10.1109\/LSP.2023.3262447","volume":"30","author":"HJ Kweon","year":"2023","unstructured":"Kweon, H. J., & Cho, D. (2023). Cloth-changing person re-identification with noisy patch filtering. IEEE Signal Processing Letters, 30, 334\u2013338.","journal-title":"IEEE Signal Processing Letters"},{"key":"2548_CR35","unstructured":"Li, J., Li, D., & Savarese, S., et\u00a0al. (2023a). BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In International Conference on Machine Learning, PMLR, pp 19730\u201319742."},{"key":"2548_CR36","unstructured":"Li, J., Li, D., & Xiong, C., et\u00a0al. (2022a). BLIP: bootstrapping language-image pre-training for unified vision-language understanding and generation. In International Conference on Machine Learning, PMLR, pp 12888\u201312900."},{"key":"2548_CR37","doi-asserted-by":"crossref","unstructured":"Li, J., Niu, L., & Zhang, L. (2023b). Knowledge proxy intervention for deconfounded video question answering. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 2782\u20132793.","DOI":"10.1109\/ICCV51070.2023.00261"},{"issue":"6","key":"2548_CR38","doi-asserted-by":"publisher","first-page":"3260","DOI":"10.1109\/TPAMI.2020.3048039","volume":"44","author":"P Li","year":"2020","unstructured":"Li, P., Xu, Y., Wei, Y., et al. (2020). Self-correction for human parsing. IEEE Transactions on Pattern Analysis & Machine Intelligence, 44(6), 3260\u20133271.","journal-title":"IEEE Transactions on Pattern Analysis & Machine Intelligence"},{"issue":"4","key":"2548_CR39","doi-asserted-by":"publisher","first-page":"1839","DOI":"10.1109\/TCSVT.2022.3216769","volume":"33","author":"S Li","year":"2023","unstructured":"Li, S., Chen, H., Yu, S., et al. (2023). Cocas+: Large-scale clothes-changing person re-identification with clothes templates. IEEE Transactions on Circuits and Systems for Video Technology, 33(4), 1839\u20131853.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2548_CR40","doi-asserted-by":"crossref","unstructured":"Li, S., Sun, L., & Li, Q. (2023d). Clip-reid: exploiting vision-language model for image re-identification without concrete text labels. In Proceedings of the AAAI Conference on Artificial Intelligence, pp 1405\u20131413.","DOI":"10.1609\/aaai.v37i1.25225"},{"key":"2548_CR41","doi-asserted-by":"crossref","unstructured":"Li, W., Hou, S., & Zhang, C., et\u00a0al. (2023e). An in-depth exploration of person re-identification and gait recognition in cloth-changing conditions. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 13824\u201313833.","DOI":"10.1109\/CVPR52729.2023.01328"},{"key":"2548_CR42","doi-asserted-by":"crossref","unstructured":"Li, X., Liu, B., & Lu, Y., et\u00a0al. (2022b). Cloth-aware center cluster loss for cloth-changing person re-identification. In Proceedings of the Chinese Conference on Pattern Recognition and Computer Vision, Springer, pp 527\u2013539.","DOI":"10.1007\/978-3-031-18907-4_41"},{"key":"2548_CR43","doi-asserted-by":"crossref","unstructured":"Li, X., Lu, Y., & Liu ,B., et\u00a0al. (2022c). Counterfactual intervention feature transfer for visible-infrared person re-identification. In European conference on computer vision, Springer, pp 381\u2013398.","DOI":"10.1007\/978-3-031-19809-0_22"},{"key":"2548_CR44","unstructured":"Li, X., Lu, Y., & Liu, B., et\u00a0al. (2023f). Clothes-invariant feature learning by causal intervention for clothes-changing person re-identification. CoRR abs\/2305.06145."},{"key":"2548_CR45","doi-asserted-by":"crossref","unstructured":"Li, Y.J., Weng, X., & Kitani, K.M. (2021). Learning shape representations for person re-identification under clothing change. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 2432\u20132441.","DOI":"10.1109\/WACV48630.2021.00248"},{"key":"2548_CR46","doi-asserted-by":"crossref","unstructured":"Liu, F., Kim, M., & Gu, Z., et\u00a0al.(2023a) Learning clothing and pose invariant 3d shape representation for long-term person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 19617\u201319626.","DOI":"10.1109\/ICCV51070.2023.01797"},{"key":"2548_CR47","doi-asserted-by":"publisher","first-page":"5075","DOI":"10.1109\/TIP.2023.3310307","volume":"32","author":"F Liu","year":"2023","unstructured":"Liu, F., Ye, M., & Du, B. (2023). Dual level adaptive weighting for cloth-changing person re-identification. IEEE Transactions on Image Processing, 32, 5075\u20135086.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR48","doi-asserted-by":"crossref","unstructured":"Liu, J., Shen, Z., & Cui, P., et\u00a0al. (2021). Stable adversarial learning under distributional shifts. In Proceedings of the AAAI Conference on Artificial Intelligence, pp 8662\u20138670.","DOI":"10.1609\/aaai.v35i10.17050"},{"key":"2548_CR49","doi-asserted-by":"crossref","unstructured":"Liu R, Liu H, Li G, et\u00a0al (2022) Contextual debiasing for visual recognition with causal mechanisms. In: Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 12745\u201312755","DOI":"10.1109\/CVPR52688.2022.01242"},{"key":"2548_CR50","doi-asserted-by":"crossref","unstructured":"Liu, S., Zeng, Z., & Ren, T., et\u00a0al. (2023c). Grounding DINO: marrying DINO with grounded pre-training for open-set object detection. CoRR abs\/2303.05499.","DOI":"10.1007\/978-3-031-72970-6_3"},{"key":"2548_CR51","unstructured":"Liu, Y., Ge, H., & Wang, Z., et\u00a0al. (2023d). Clothes-changing person re-identification via universal framework with association and forgetting learning. IEEE Transactions on Multimedia pp 1\u201314."},{"key":"2548_CR52","doi-asserted-by":"publisher","first-page":"406","DOI":"10.1109\/TMM.2023.3266066","volume":"26","author":"A Lu","year":"2024","unstructured":"Lu, A., Zhang, Z., Huang, Y., et al. (2024). Illumination distillation framework for nighttime person re-identification and a new benchmark. IEEE Transactions on Multimedia, 26, 406\u2013419.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2548_CR53","doi-asserted-by":"crossref","unstructured":"Lv, F., Liang, J., & Li, S., et\u00a0al. (2022). Causality inspired representation learning for domain generalization. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 8046\u20138056.","DOI":"10.1109\/CVPR52688.2022.00788"},{"key":"2548_CR54","unstructured":"Mahajan, D., Tople, S., & Sharma, A. (2021). Domain generalization using causal matching. In International Conference on Machine Learning, PMLR, pp 7313\u20137324."},{"key":"2548_CR55","doi-asserted-by":"crossref","unstructured":"Mao, C., Xia, K., & Wang, J., et\u00a0al. (2022). Causal transportability for visual recognition. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 7521\u20137531.","DOI":"10.1109\/CVPR52688.2022.00737"},{"key":"2548_CR56","doi-asserted-by":"crossref","unstructured":"Miao, J., Chen, C., & Liu, F., et\u00a0al. (2023). Caussl: Causality-inspired semi-supervised learning for medical image segmentation. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 21426\u201321437.","DOI":"10.1109\/ICCV51070.2023.01959"},{"key":"2548_CR57","first-page":"35087","volume":"35","author":"Y Ming","year":"2022","unstructured":"Ming, Y., Cai, Z., Gu, J., et al. (2022). Delving into out-of-distribution detection with vision-language representations. Advances in Neural Information Processing Systems, 35, 35087\u201335102.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"2548_CR58","doi-asserted-by":"crossref","unstructured":"Nguyen, V.D., Khaldi, K., & Nguyen, D., et\u00a0al. (2024). Contrastive viewpoint-aware shape learning for long-term person re-identification. In Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp 1041\u20131049.","DOI":"10.1109\/WACV57701.2024.00108"},{"key":"2548_CR59","doi-asserted-by":"crossref","unstructured":"Niu, Y., Tang, K., & Zhang, H., et\u00a0al. (2021). Counterfactual VQA: A cause-effect look at language bias. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 12700\u201312710.","DOI":"10.1109\/CVPR46437.2021.01251"},{"issue":"4","key":"2548_CR60","doi-asserted-by":"publisher","first-page":"1095","DOI":"10.1109\/TMI.2022.3224067","volume":"42","author":"C Ouyang","year":"2022","unstructured":"Ouyang, C., Chen, C., Li, S., et al. (2022). Causality-inspired single-source domain generalization for medical image segmentation. IEEE Transactions on Medical Imaging, 42(4), 1095\u20131106.","journal-title":"IEEE Transactions on Medical Imaging"},{"key":"2548_CR61","unstructured":"Pearl, J. (2010). Causal inference. In JMLR Causality: Objectives and Assessment, pp 39\u201358."},{"key":"2548_CR62","unstructured":"Pearl, J. (2013). Direct and indirect effects. arXiv:1301.2300."},{"key":"2548_CR63","doi-asserted-by":"crossref","unstructured":"Peng, C., Wang, B., & Liu, D., et\u00a0al. (2024). Masked attribute description embedding for cloth-changing person re-identification. IEEE Transactions on Multimedia.","DOI":"10.1109\/TMM.2024.3521730"},{"key":"2548_CR64","doi-asserted-by":"crossref","unstructured":"Qian, X., Wang, W., & Zhang, L., et\u00a0al. (2020). Long-term cloth-changing person re-identification. In Proceedings of the Asian Conference on Computer Vision, pp 71\u201388.","DOI":"10.1007\/978-3-030-69535-4_5"},{"key":"2548_CR65","unstructured":"Radford, A., Kim, J.W., & Hallacy, C., et\u00a0al. (2021). Learning transferable visual models from natural language supervision. In International Conference on Machine Learning, PMLR, vol 139. PMLR, pp 8748\u20138763."},{"key":"2548_CR66","doi-asserted-by":"crossref","unstructured":"Rao, Y., Chen, G., & Lu, J., et\u00a0al. (2021). Counterfactual attention learning for fine-grained visual categorization and re-identification. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 1005\u20131014.","DOI":"10.1109\/ICCV48922.2021.00106"},{"key":"2548_CR67","unstructured":"Ren, T., Liu, S., & Zeng, A., et\u00a0al. (2024). Grounded sam: Assembling open-world models for diverse visual tasks. arXiv preprint arXiv:2401.14159."},{"issue":"2","key":"2548_CR68","doi-asserted-by":"publisher","first-page":"336","DOI":"10.1007\/s11263-019-01228-7","volume":"128","author":"RR Selvaraju","year":"2020","unstructured":"Selvaraju, R. R., Cogswell, M., Das, A., et al. (2020). Grad-CAM: Visual explanations from deep networks via gradient-based localization. International Journal of Computer Vision, 128(2), 336\u2013359.","journal-title":"International Journal of Computer Vision"},{"key":"2548_CR69","doi-asserted-by":"crossref","unstructured":"Shao, Z., Zhang, X., & Ding, C., et\u00a0al. (2023). Unified pre-training with pseudo texts for text-to-image person re-identification. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 11174\u201311184.","DOI":"10.1109\/ICCV51070.2023.01026"},{"key":"2548_CR70","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2021.104335","volume":"117","author":"W Shi","year":"2022","unstructured":"Shi, W., Liu, H., & Liu, M. (2022). IRANet: identity-relevance aware representation for cloth-changing person re-identification. Image and vision computing, 117, Article 104335.","journal-title":"Image and vision computing"},{"key":"2548_CR71","doi-asserted-by":"publisher","first-page":"1365","DOI":"10.1109\/LSP.2021.3091924","volume":"28","author":"X Shu","year":"2021","unstructured":"Shu, X., Li, G., Wang, X., et al. (2021). Semantic-guided pixel sampling for cloth-changing person re-identification. IEEE Signal Processing Letters, 28, 1365\u20131369.","journal-title":"IEEE Signal Processing Letters"},{"issue":"7","key":"2548_CR72","doi-asserted-by":"publisher","first-page":"4390","DOI":"10.1109\/TCSVT.2021.3128214","volume":"32","author":"X Shu","year":"2021","unstructured":"Shu, X., Wang, X., Zang, X., et al. (2021). Large-scale spatio-temporal person re-identification: Algorithms and benchmark. IEEE Transactions on Circuits and Systems for Video Technology, 32(7), 4390\u20134403.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2548_CR73","doi-asserted-by":"crossref","unstructured":"Siddiqui, N., Croitoru, F.A., & Nayak, G.K., et\u00a0al. (2024). Dlcr: A generative data expansion framework via diffusion for clothes-changing person re-id. arXiv preprint arXiv:2411.07205.","DOI":"10.1109\/WACV61041.2025.00164"},{"key":"2548_CR74","doi-asserted-by":"crossref","unstructured":"Singh, K.K., & Lee, Y.J. (2017). Hide-and-seek: Forcing a network to be meticulous for weakly-supervised object and action localization. In Proceedings of the IEEE\/CVF International Conference on Computer Vision. IEEE, pp 3544\u20133553.","DOI":"10.1109\/ICCV.2017.381"},{"key":"2548_CR75","doi-asserted-by":"crossref","unstructured":"Somers, V., De\u00a0Vleeschouwer, C., & Alahi, A. (2023). Body part-based representation learning for occluded person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 1613\u20131623.","DOI":"10.1109\/WACV56688.2023.00166"},{"key":"2548_CR76","doi-asserted-by":"crossref","unstructured":"Song, C., Huang, Y., & Ouyang, W., et\u00a0al. (2018). Mask-guided contrastive attention model for person re-identification. In Proceedings of the IEEE conference on computer vision and pattern recognition, pp 1179\u20131188.","DOI":"10.1109\/CVPR.2018.00129"},{"key":"2548_CR77","doi-asserted-by":"crossref","unstructured":"Song, S., Wan, J., & Yang, Z., et\u00a0al. (2022). Vision-language pre-training for boosting scene text detectors. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 15681\u201315691.","DOI":"10.1109\/CVPR52688.2022.01523"},{"issue":"10","key":"2548_CR78","doi-asserted-by":"publisher","first-page":"12562","DOI":"10.1109\/TPAMI.2023.3285009","volume":"45","author":"S Sun","year":"2023","unstructured":"Sun, S., Zhi, S., Liao, Q., et al. (2023). Unbiased scene graph generation via two-stage causal modeling. IEEE Transactions on Pattern Analysis & Machine Intelligence, 45(10), 12562\u201312580.","journal-title":"IEEE Transactions on Pattern Analysis & Machine Intelligence"},{"key":"2548_CR79","doi-asserted-by":"crossref","unstructured":"Sun, Y., Zheng, L., & Yang, Y., et\u00a0al. (2018). Beyond part models: Person retrieval with refined part pooling (and A strong convolutional baseline). In European conference on computer vision, Springer, pp 501\u2013518.","DOI":"10.1007\/978-3-030-01225-0_30"},{"key":"2548_CR80","doi-asserted-by":"crossref","unstructured":"Tang, K., Niu, Y., & Huang, J., et\u00a0al. (2020). Unbiased scene graph generation from biased training. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 3713\u20133722.","DOI":"10.1109\/CVPR42600.2020.00377"},{"key":"2548_CR81","doi-asserted-by":"crossref","unstructured":"Tang, S., Chen, C., & Xie, Q., et\u00a0al. (2023). Humanbench: Towards general human-centric perception with projector assisted pretraining. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 21970\u201321982.","DOI":"10.1109\/CVPR52729.2023.02104"},{"key":"2548_CR82","doi-asserted-by":"crossref","unstructured":"Wan, F., Wu, Y., & Qian, X., et\u00a0al. (2020). When person re-identification meets changing clothes. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference Workshop, pp 830\u2013831.","DOI":"10.1109\/CVPRW50498.2020.00423"},{"key":"2548_CR83","unstructured":"Wang, P., Bai, S., & Tan, S., et\u00a0al. (2024a). Qwen2-vl: Enhancing vision-language model\u2019s perception of the world at any resolution. CoRR abs\/2409.12191."},{"key":"2548_CR84","doi-asserted-by":"crossref","unstructured":"Wang, Q., Qian, X., & Li, B., et\u00a0al. (2024b). Exploring fine-grained representation and recomposition for cloth-changing person re-identification. IEEE Transactions on Information Forensics and Security.","DOI":"10.1109\/TIFS.2024.3414667"},{"key":"2548_CR85","doi-asserted-by":"publisher","first-page":"155","DOI":"10.1016\/j.neucom.2022.11.009","volume":"518","author":"J Wu","year":"2023","unstructured":"Wu, J., Yang, Y., Lei, Z., et al. (2023). Camera-aware representation learning for person re-identification. Neurocomputing, 518, 155\u2013164.","journal-title":"Neurocomputing"},{"key":"2548_CR86","doi-asserted-by":"publisher","first-page":"4803","DOI":"10.1109\/TIP.2022.3186746","volume":"31","author":"L Wu","year":"2022","unstructured":"Wu, L., Liu, D., Zhang, W., et al. (2022). Pseudo-pair based self-similarity learning for unsupervised person re-identification. IEEE Transactions on Image Processing, 31, 4803\u20134816.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR87","doi-asserted-by":"crossref","unstructured":"Wu, Y., Wei, P., & Lin, L. (2023b). Scene graph to image synthesis via knowledge consensus. In Proceedings of the AAAI Conference on Artificial Intelligence, pp 2856\u20132865.","DOI":"10.1609\/aaai.v37i3.25387"},{"key":"2548_CR88","doi-asserted-by":"crossref","unstructured":"Xie, D., Liu, L., & Zhang, S., et\u00a0al. (2023). A unified multi-modal structure for retrieving tracked vehicles through natural language descriptions. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 5418\u20135426.","DOI":"10.1109\/CVPRW59228.2023.00572"},{"key":"2548_CR89","doi-asserted-by":"crossref","unstructured":"Xue, D., Qian, S., & Xu, C. (2023). Variational causal inference network for explanatory visual question answering. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 2515\u20132525.","DOI":"10.1109\/ICCV51070.2023.00238"},{"key":"2548_CR90","doi-asserted-by":"crossref","unstructured":"Yan, B., & Pei, M. (2022). Clinical-bert: Vision-language pre-training for radiograph diagnosis and reports generation. In Proceedings of the AAAI Conference on Artificial Intelligence, pp 2982\u20132990.","DOI":"10.1609\/aaai.v36i3.20204"},{"key":"2548_CR91","doi-asserted-by":"publisher","first-page":"6032","DOI":"10.1109\/TIP.2023.3327924","volume":"32","author":"S Yan","year":"2023","unstructured":"Yan, S., Dong, N., Zhang, L., et al. (2023). Clip-driven fine-grained text-image person re-identification. IEEE Transactions on Image Processing, 32, 6032\u20136046.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR92","doi-asserted-by":"crossref","unstructured":"Yan, Y., Yu, H., & Li, S., et\u00a0al. (2022). Weakening the influence of clothing: Universal clothing attribute disentanglement for person re-identification. In Proceeding of the International Joint Conferences on Artificial Intelligence, pp 1523\u20131529.","DOI":"10.24963\/ijcai.2022\/212"},{"key":"2548_CR93","doi-asserted-by":"publisher","first-page":"3386","DOI":"10.1109\/TMM.2022.3160057","volume":"25","author":"K Yang","year":"2023","unstructured":"Yang, K., & Tian, X. (2023). Domain-class correlation decomposition for generalizable person re-identification. IEEE Transactions on Multimedia, 25, 3386\u20133396.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2548_CR94","doi-asserted-by":"crossref","unstructured":"Yang, M., Liu, F., & Chen, Z., et\u00a0al. (2021a). CausalVAE: disentangled representation learning via neural structural causal models. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 9593\u20139602.","DOI":"10.1109\/CVPR46437.2021.00947"},{"issue":"6","key":"2548_CR95","doi-asserted-by":"publisher","first-page":"2029","DOI":"10.1109\/TPAMI.2019.2960509","volume":"43","author":"Q Yang","year":"2021","unstructured":"Yang, Q., Wu, A., & Zheng, W. (2021). Person re-identification by contour sketch under moderate clothing change. IEEE Transactions on Pattern Analysis & Machine Intelligence, 43(6), 2029\u20132046.","journal-title":"IEEE Transactions on Pattern Analysis & Machine Intelligence"},{"key":"2548_CR96","doi-asserted-by":"publisher","first-page":"6412","DOI":"10.1109\/TIP.2022.3207024","volume":"31","author":"S Yang","year":"2022","unstructured":"Yang, S., Kang, B., & Lee, Y. (2022). Sampling agnostic feature representation for long-term person re-identification. IEEE Transactions on Image Processing, 31, 6412\u20136423.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR97","doi-asserted-by":"crossref","unstructured":"Yang, Z., Zhong, X., & Liu, H., et\u00a0al. (2022b). Attentive decoupling network for cloth-changing re-identification. In Proceedings of the IEEE International conference on Multimedia and Expo, pp 1\u20136.","DOI":"10.1109\/ICME52920.2022.9859851"},{"key":"2548_CR98","doi-asserted-by":"crossref","unstructured":"Yang, Z., Lin, M., & Zhong, X., et\u00a0al. (2023a). Good is bad: Causality inspired cloth-debiasing for cloth-changing person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 1472\u20131481.","DOI":"10.1109\/CVPR52729.2023.00148"},{"key":"2548_CR99","doi-asserted-by":"publisher","first-page":"2985","DOI":"10.1109\/TIP.2023.3277389","volume":"32","author":"Z Yang","year":"2023","unstructured":"Yang, Z., Zhong, X., Zhong, Z., et al. (2023). Win-win by competition: Auxiliary-free cloth-changing person re-identification. IEEE Transactions on Image Processing, 32, 2985\u20132999.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR100","unstructured":"Yao, Y., Yu, T., & Zhang, A., et\u00a0al. (2024). Minicpm-v: A GPT-4V level MLLM on your phone. CoRR arxiv:abs\/2408.01800."},{"issue":"6","key":"2548_CR101","doi-asserted-by":"publisher","first-page":"2872","DOI":"10.1109\/TPAMI.2021.3054775","volume":"44","author":"M Ye","year":"2021","unstructured":"Ye, M., Shen, J., Lin, G., et al. (2021). Deep learning for person re-identification: A survey and outlook. IEEE Transactions on Pattern Analysis & Machine Intelligence, 44(6), 2872\u20132893.","journal-title":"IEEE Transactions on Pattern Analysis & Machine Intelligence"},{"key":"2548_CR102","doi-asserted-by":"crossref","unstructured":"Yu, H., Liu, B., & Lu, Y., et\u00a0al. (2022a). Multi-view geometry distillation for cloth-changing person reid. In Proceedings of the Chinese Conference on Pattern Recognition and Computer Vision, Springer, pp. 29\u201341.","DOI":"10.1007\/978-3-031-18907-4_3"},{"key":"2548_CR103","doi-asserted-by":"crossref","unstructured":"Yu, S., Li, S., & Chen, D., et\u00a0al. (2020). COCAS: A large-scale clothes changing person dataset for re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 3397\u20133406.","DOI":"10.1109\/CVPR42600.2020.00346"},{"key":"2548_CR104","doi-asserted-by":"crossref","unstructured":"Yu, Y., Zhan, F., & Wu, R., et\u00a0al. (2022b). Towards counterfactual image manipulation via clip. In Proceedings of the ACM International Conference on Multimedia, pp 3637\u20133645.","DOI":"10.1145\/3503161.3547935"},{"key":"2548_CR105","doi-asserted-by":"crossref","unstructured":"Zang, C., Wang, H., & Pei, M., et\u00a0al. (2023). Discovering the real association: Multimodal causal reasoning in video question answering. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 19027\u201319036.","DOI":"10.1109\/CVPR52729.2023.01824"},{"issue":"10","key":"2548_CR106","doi-asserted-by":"publisher","first-page":"6766","DOI":"10.1109\/TCSVT.2022.3169422","volume":"32","author":"G Zhang","year":"2022","unstructured":"Zhang, G., Luo, Z., Chen, Y., et al. (2022). Illumination unification for person re-identification. IEEE Transactions on Circuits and Systems for Video Technology, 32(10), 6766\u20136777.","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"2548_CR107","doi-asserted-by":"publisher","first-page":"4555","DOI":"10.1109\/TIP.2023.3279673","volume":"32","author":"G Zhang","year":"2023","unstructured":"Zhang, G., Liu, J., Chen, Y., et al. (2023). Multi-biometric unified network for cloth-changing person re-identification. IEEE Transactions on Image Processing, 32, 4555\u20134566.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR108","doi-asserted-by":"publisher","first-page":"4706","DOI":"10.1109\/TMM.2023.3325965","volume":"26","author":"K Zhang","year":"2024","unstructured":"Zhang, K., Yang, Y., Yu, J., et al. (2024). Multi-task paired masking with alignment modeling for medical vision-language pre-training. IEEE Transactions on Multimedia, 26, 4706\u20134721.","journal-title":"IEEE Transactions on Multimedia"},{"key":"2548_CR109","doi-asserted-by":"crossref","unstructured":"Zhang, X., Cui, P., & Xu, R., et\u00a0al. (2021). Deep stable learning for out-of-distribution generalization. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 5372\u20135382.","DOI":"10.1109\/CVPR46437.2021.00533"},{"key":"2548_CR110","doi-asserted-by":"publisher","first-page":"509","DOI":"10.1109\/TIP.2022.3229621","volume":"32","author":"YF Zhang","year":"2023","unstructured":"Zhang, Y. F., Zhang, Z., Li, D., et al. (2023). Learning domain invariant representations for generalizable person re-identification. IEEE Transactions on Image Processing, 32, 509\u2013523.","journal-title":"IEEE Transactions on Image Processing"},{"key":"2548_CR111","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Yang, X., & Yu, Z., et\u00a0al. (2019). Joint discriminative and generative learning for person re-identification. In Proceedings of the IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp 2138\u20132147.","DOI":"10.1109\/CVPR.2019.00224"},{"key":"2548_CR112","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Zheng, L., & Kang, G., et\u00a0al. (2020). Random erasing data augmentation. In Proceedings of the AAAI Conference on Artificial Intelligence, pp 13001\u201313008.","DOI":"10.1609\/aaai.v34i07.7000"},{"key":"2548_CR113","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, Y., & Cavallaro, A., et\u00a0al. (2019). Omni-scale feature learning for person re-identification. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 3701\u20133711.","DOI":"10.1109\/ICCV.2019.00380"},{"key":"2548_CR114","doi-asserted-by":"crossref","unstructured":"Zhou, Z., Liu, H., & Shi, W., et\u00a0al. (2022). A cloth-irrelevant harmonious attention network for cloth-changing person re-identification. In Proc. IEEE Int. Conf. Pattern Recog., pp 989\u2013995.","DOI":"10.1109\/ICPR56361.2022.9956160"},{"key":"2548_CR115","unstructured":"Zhu, D., Chen, J., & Shen, X., et\u00a0al. (2024). MiniGPT-4: Enhancing vision-language understanding with advanced large language models. In Proceedings of the International Conference on Learning Representations, pp 1\u201317."},{"key":"2548_CR116","doi-asserted-by":"crossref","unstructured":"Zhu, K., Guo, H., & Liu, Z., et\u00a0al. (2020). Identity-guided human semantic parsing for person re-identification. In European conference on computer vision, Springer, pp 346\u2013363.","DOI":"10.1007\/978-3-030-58580-8_21"}],"container-title":["International Journal of Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02548-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11263-025-02548-7\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11263-025-02548-7.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T06:27:10Z","timestamp":1762928830000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11263-025-02548-7"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,8,11]]},"references-count":116,"journal-issue":{"issue":"11","published-print":{"date-parts":[[2025,11]]}},"alternative-id":["2548"],"URL":"https:\/\/doi.org\/10.1007\/s11263-025-02548-7","relation":{},"ISSN":["0920-5691","1573-1405"],"issn-type":[{"value":"0920-5691","type":"print"},{"value":"1573-1405","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,8,11]]},"assertion":[{"value":"8 April 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 July 2025","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"11 August 2025","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}]}}