{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T03:04:15Z","timestamp":1780542255899,"version":"3.54.1"},"reference-count":41,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,9,1]],"date-time":"2026-09-01T00:00:00Z","timestamp":1788220800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/501100013804","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100013804","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100003995","name":"Anhui Provincial Natural Science Foundation","doi-asserted-by":"publisher","id":[{"id":"10.13039\/501100003995","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Signal Processing: Image Communication"],"published-print":{"date-parts":[[2026,9]]},"DOI":"10.1016\/j.image.2026.117598","type":"journal-article","created":{"date-parts":[[2026,5,22]],"date-time":"2026-05-22T15:37:54Z","timestamp":1779464274000},"page":"117598","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Parameter-efficient transfer for CLIP-based text-to-person retrieval"],"prefix":"10.1016","volume":"147","author":[{"given":"Hai","family":"Min","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Guanghui","family":"Zhan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Chunxiao","family":"Fan","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Yang","family":"Zhao","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"given":"Wei","family":"Jia","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"78","reference":[{"key":"10.1016\/j.image.2026.117598_b1","series-title":"International Conference on Machine Learning","first-page":"8748","article-title":"Learning transferable visual models from natural language supervision","author":"Radford","year":"2021"},{"key":"10.1016\/j.image.2026.117598_b2","series-title":"International Conference on Machine Learning","first-page":"4904","article-title":"Scaling up visual and vision-language representation learning with noisy text supervision","author":"Jia","year":"2021"},{"issue":"2","key":"10.1016\/j.image.2026.117598_b3","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1145\/3383184","article-title":"Dual-path convolutional image-text embeddings with instance loss","volume":"16","author":"Zheng","year":"2020","journal-title":"ACM Trans. Multimed. Comput. Commun. Appl. (TOMM)"},{"key":"10.1016\/j.image.2026.117598_b4","doi-asserted-by":"crossref","first-page":"171","DOI":"10.1016\/j.neucom.2022.04.081","article-title":"TIPCB: A simple but effective part-based convolutional baseline for text-based person search","volume":"494","author":"Chen","year":"2022","journal-title":"Neurocomputing"},{"key":"10.1016\/j.image.2026.117598_b5","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2023.110253","article-title":"Text-based person search via local-relational-global fine grained alignment","volume":"262","author":"Zhou","year":"2023","journal-title":"Knowl.-Based Syst."},{"key":"10.1016\/j.image.2026.117598_b6","doi-asserted-by":"crossref","unstructured":"X. Han, S. He, L. Zhang, T. Xiang, Text-Based Person Search with Limited Data, in: British Machine Vision Conference, 2021, pp. 1\u201313.","DOI":"10.5244\/C.35.10"},{"key":"10.1016\/j.image.2026.117598_b7","doi-asserted-by":"crossref","first-page":"6032","DOI":"10.1109\/TIP.2023.3327924","article-title":"Clip-driven fine-grained text-image person re-identification","volume":"32","author":"Yan","year":"2023","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.image.2026.117598_b8","doi-asserted-by":"crossref","unstructured":"Y. Qin, Y. Chen, D. Peng, X. Peng, J.T. Zhou, P. Hu, Noisy-correspondence learning for text-to-image person re-identification, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 27197\u201327206.","DOI":"10.1109\/CVPR52733.2024.02568"},{"key":"10.1016\/j.image.2026.117598_b9","doi-asserted-by":"crossref","unstructured":"D. Jiang, M. Ye, Cross-modal implicit relation reasoning and aligning for text-to-image person retrieval, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 2787\u20132797.","DOI":"10.1109\/CVPR52729.2023.00273"},{"key":"10.1016\/j.image.2026.117598_b10","series-title":"International Conference on Machine Learning","first-page":"2790","article-title":"Parameter-efficient transfer learning for NLP","author":"Houlsby","year":"2019"},{"issue":"2","key":"10.1016\/j.image.2026.117598_b11","first-page":"3","article-title":"Lora: Low-rank adaptation of large language models","volume":"1","author":"Hu","year":"2022","journal-title":"ICLR"},{"key":"10.1016\/j.image.2026.117598_b12","doi-asserted-by":"crossref","unstructured":"B. Lester, R. Al-Rfou, N. Constant, The Power of Scale for Parameter-Efficient Prompt Tuning, in: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing, 2021, pp. 3045\u20133059.","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"10.1016\/j.image.2026.117598_b13","first-page":"10890","article-title":"R-drop: Regularized dropout for neural networks","volume":"34","author":"Wu","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.image.2026.117598_b14","doi-asserted-by":"crossref","unstructured":"S. Li, T. Xiao, H. Li, B. Zhou, D. Yue, X. Wang, Person search with natural language description, in: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2017, pp. 1970\u20131979.","DOI":"10.1109\/CVPR.2017.551"},{"key":"10.1016\/j.image.2026.117598_b15","doi-asserted-by":"crossref","unstructured":"S. Sun, Y.-C. Chen, L. Li, S. Wang, Y. Fang, J. Liu, Lightningdot: Pre-training visual-semantic embeddings for real-time image-text retrieval, in: Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021, pp. 982\u2013997.","DOI":"10.18653\/v1\/2021.naacl-main.77"},{"key":"10.1016\/j.image.2026.117598_b16","doi-asserted-by":"crossref","unstructured":"A. Miech, J.-B. Alayrac, I. Laptev, J. Sivic, A. Zisserman, Thinking fast and slow: Efficient text-to-visual retrieval with transformers, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2021, pp. 9826\u20139836.","DOI":"10.1109\/CVPR46437.2021.00970"},{"key":"10.1016\/j.image.2026.117598_b17","doi-asserted-by":"crossref","unstructured":"S. He, H. Luo, P. Wang, F. Wang, H. Li, W. Jiang, Transreid: Transformer-based object re-identification, in: Proceedings of the IEEE\/CVF International Conference on Computer Vision, 2021, pp. 15013\u201315022.","DOI":"10.1109\/ICCV48922.2021.01474"},{"key":"10.1016\/j.image.2026.117598_b18","doi-asserted-by":"crossref","unstructured":"H. Luo, Y. Gu, X. Liao, S. Lai, W. Jiang, Bag of tricks and a strong baseline for deep person re-identification, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition Workshops, 2019.","DOI":"10.1109\/CVPRW.2019.00190"},{"key":"10.1016\/j.image.2026.117598_b19","doi-asserted-by":"crossref","unstructured":"H. Wang, J. Shen, Y. Liu, Y. Gao, E. Gavves, Nformer: Robust person re-identification with neighbor transformer, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 7297\u20137307.","DOI":"10.1109\/CVPR52688.2022.00715"},{"key":"10.1016\/j.image.2026.117598_b20","doi-asserted-by":"crossref","unstructured":"S. Li, T. Xiao, H. Li, W. Yang, X. Wang, Identity-aware textual-visual matching with latent co-attention, in: Proceedings of the IEEE International Conference on Computer Vision, 2017, pp. 1890\u20131899.","DOI":"10.1109\/ICCV.2017.209"},{"key":"10.1016\/j.image.2026.117598_b21","series-title":"2018 IEEE Winter Conference on Applications of Computer Vision","first-page":"1879","article-title":"Improving text-based person search by spatial matching and adaptive threshold","author":"Chen","year":"2018"},{"key":"10.1016\/j.image.2026.117598_b22","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2023.109636","article-title":"BDNet: A BERT-based dual-path network for text-to-image cross-modal person re-identification","volume":"141","author":"Liu","year":"2023","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.image.2026.117598_b23","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2024.109421","article-title":"Bottom-up color-independent alignment learning for text\u2013image person re-identification","volume":"138","author":"Du","year":"2024","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.image.2026.117598_b24","doi-asserted-by":"crossref","DOI":"10.1016\/j.patcog.2024.110481","article-title":"Text-based person search via cross-modal alignment learning","volume":"152","author":"Ke","year":"2024","journal-title":"Pattern Recognit."},{"key":"10.1016\/j.image.2026.117598_b25","doi-asserted-by":"crossref","DOI":"10.1016\/j.eswa.2024.124071","article-title":"Full-view salient feature mining and alignment for text-based person search","volume":"251","author":"Xie","year":"2024","journal-title":"Expert Syst. Appl."},{"key":"10.1016\/j.image.2026.117598_b26","doi-asserted-by":"crossref","DOI":"10.1016\/j.engappai.2025.111496","article-title":"Multi-scale feature refinement via perspective scaling and adaptive regularization for text-based person search","volume":"159","author":"Xie","year":"2025","journal-title":"Eng. Appl. Artif. Intell."},{"key":"10.1016\/j.image.2026.117598_b27","doi-asserted-by":"crossref","DOI":"10.1016\/j.jvcir.2024.104219","article-title":"Cross-modal feature learning and alignment network for text\u2013image person re-identification","volume":"103","author":"Huang","year":"2024","journal-title":"J. Vis. Commun. Image Represent."},{"issue":"1","key":"10.1016\/j.image.2026.117598_b28","doi-asserted-by":"crossref","first-page":"38","DOI":"10.1007\/s11633-022-1369-5","article-title":"Vlp: A survey on vision-language pre-training","volume":"20","author":"Chen","year":"2023","journal-title":"Mach. Intell. Res."},{"key":"10.1016\/j.image.2026.117598_b29","article-title":"Vilbert: Pretraining task-agnostic visiolinguistic representations for vision-and-language tasks","volume":"32","author":"Lu","year":"2019","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.image.2026.117598_b30","doi-asserted-by":"crossref","unstructured":"J. Devlin, M.-W. Chang, K. Lee, K. Toutanova, Bert: Pre-training of deep bidirectional transformers for language understanding, in: Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), 2019, pp. 4171\u20134186.","DOI":"10.18653\/v1\/N19-1423"},{"key":"10.1016\/j.image.2026.117598_b31","series-title":"Visualbert: A simple and performant baseline for vision and language","author":"Li","year":"2019"},{"key":"10.1016\/j.image.2026.117598_b32","series-title":"International Conference on Machine Learning","first-page":"5986","article-title":"Bert and pals: Projected attention layers for efficient adaptation in multi-task learning","author":"Stickland","year":"2019"},{"key":"10.1016\/j.image.2026.117598_b33","doi-asserted-by":"crossref","unstructured":"Y.-L. Sung, J. Cho, M. Bansal, Vl-adapter: Parameter-efficient transfer learning for vision-and-language tasks, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2022, pp. 5227\u20135237.","DOI":"10.1109\/CVPR52688.2022.00516"},{"key":"10.1016\/j.image.2026.117598_b34","doi-asserted-by":"crossref","unstructured":"X.L. Li, P. Liang, Prefix-Tuning: Optimizing Continuous Prompts for Generation, in: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers), 2021, pp. 4582\u20134597.","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"10.1016\/j.image.2026.117598_b35","doi-asserted-by":"crossref","unstructured":"A. Zhu, Z. Wang, Y. Li, X. Wan, J. Jin, T. Wang, F. Hu, G. Hua, Dssl: Deep surroundings-person separation learning for text-based person retrieval, in: Proceedings of the 29th ACM International Conference on Multimedia, 2021, pp. 209\u2013217.","DOI":"10.1145\/3474085.3475369"},{"key":"10.1016\/j.image.2026.117598_b36","series-title":"ICASSP 2022-2022 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"2724","article-title":"Learning semantic-aligned feature representation for text-based person search","author":"Li","year":"2022"},{"key":"10.1016\/j.image.2026.117598_b37","doi-asserted-by":"crossref","unstructured":"A. Farooq, M. Awais, J. Kittler, S.S. Khalid, Axm-net: Implicit cross-modal feature alignment for person re-identification, in: Proceedings of the AAAI Conference on Artificial Intelligence, Vol. 36, 2022, pp. 4477\u20134485.","DOI":"10.1609\/aaai.v36i4.20370"},{"key":"10.1016\/j.image.2026.117598_b38","doi-asserted-by":"crossref","unstructured":"Z. Shao, X. Zhang, M. Fang, Z. Lin, J. Wang, C. Ding, Learning granularity-unified representations for text-to-image person re-identification, in: Proceedings of the 30th Acm International Conference on Multimedia, 2022, pp. 5566\u20135574.","DOI":"10.1145\/3503161.3548028"},{"key":"10.1016\/j.image.2026.117598_b39","series-title":"European Conference on Computer Vision","first-page":"624","article-title":"See finer, see more: Implicit modality alignment for text-based person retrieval","author":"Shu","year":"2022"},{"key":"10.1016\/j.image.2026.117598_b40","series-title":"ICASSP 2024-2024 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"7935","article-title":"Clip-based synergistic knowledge transfer for text-based person retrieval","author":"Liu","year":"2024"},{"key":"10.1016\/j.image.2026.117598_b41","series-title":"ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing","first-page":"1","article-title":"Prototypical graph alignment for text-based person search","author":"Huang","year":"2025"}],"container-title":["Signal Processing: Image Communication"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0923596526001219?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0923596526001219?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,6,4]],"date-time":"2026-06-04T02:35:26Z","timestamp":1780540526000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0923596526001219"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,9]]},"references-count":41,"alternative-id":["S0923596526001219"],"URL":"https:\/\/doi.org\/10.1016\/j.image.2026.117598","relation":{},"ISSN":["0923-5965"],"issn-type":[{"value":"0923-5965","type":"print"}],"subject":[],"published":{"date-parts":[[2026,9]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Parameter-efficient transfer for CLIP-based text-to-person retrieval","name":"articletitle","label":"Article Title"},{"value":"Signal Processing: Image Communication","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.image.2026.117598","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier B.V. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"117598"}}