{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,23]],"date-time":"2026-01-23T05:30:44Z","timestamp":1769146244837,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":34,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819557158","type":"print"},{"value":"9789819557165","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5716-5_4","type":"book-chapter","created":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T13:07:12Z","timestamp":1769087232000},"page":"49-63","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Text-to-Image Person Re-identification via\u00a0Optimal Transport-Based Priority Distribution"],"prefix":"10.1007","author":[{"given":"Yihan","family":"Huo","sequence":"first","affiliation":[]},{"given":"Rui","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Jing","family":"Dong","sequence":"additional","affiliation":[]},{"given":"Pengfei","family":"Yi","sequence":"additional","affiliation":[]},{"given":"Xiaoyong","family":"Fang","sequence":"additional","affiliation":[]},{"given":"Ling","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,23]]},"reference":[{"key":"4_CR1","doi-asserted-by":"crossref","unstructured":"Zhang, Y., Lu, H.: Deep cross-modal projection learning for image-text matching. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 686\u2013701 (2018)","DOI":"10.1007\/978-3-030-01246-5_42"},{"key":"4_CR2","doi-asserted-by":"crossref","unstructured":"Zheng, Z., Zheng, L., Garrett, M., Yang, Y., Xu, M., Shen, Y.-D.: Dual-path convolutional image-text embeddings with instance loss. ACM Trans. Multimedia Comput. Commun. Appl. (TOMM) 16(2), 1\u201323 (2020)","DOI":"10.1145\/3383184"},{"key":"4_CR3","doi-asserted-by":"publisher","first-page":"171","DOI":"10.1016\/j.neucom.2022.04.081","volume":"494","author":"Y Chen","year":"2022","unstructured":"Chen, Y., Zhang, G., Lu, Y., Wang, Z., Zheng, Y.: TIPCB: a simple but effective part-based convolutional baseline for text-based person search. Neurocomputing 494, 171\u2013181 (2022)","journal-title":"Neurocomputing"},{"key":"4_CR4","unstructured":"Ding, Z., Ding, C., Shao, Z., Tao, D.: Semantically self-aligned network for text-to-image part-aware person re-identification. arXiv preprint arXiv:2107.12666 (2021)"},{"key":"4_CR5","doi-asserted-by":"crossref","unstructured":"Jing, Y., Si, C., Wang, J., Wang, W., Wang, L., Tan, T.: Pose-guided multi-granularity attention network for text-based person search. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 34, no. 07, pp. 11189\u201311196 (2020)","DOI":"10.1609\/aaai.v34i07.6777"},{"key":"4_CR6","series-title":"Lecture Notes in Computer Science","doi-asserted-by":"publisher","first-page":"402","DOI":"10.1007\/978-3-030-58610-2_24","volume-title":"Computer Vision \u2013 ECCV 2020","author":"Z Wang","year":"2020","unstructured":"Wang, Z., Fang, Z., Wang, J., Yang, Y.: ViTAA: visual-textual attributes alignment in person search by natural language. In: Vedaldi, A., Bischof, H., Brox, T., Frahm, J.-M. (eds.) ECCV 2020. LNCS, vol. 12357, pp. 402\u2013420. Springer, Cham (2020). https:\/\/doi.org\/10.1007\/978-3-030-58610-2_24"},{"key":"4_CR7","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763 (2021)"},{"key":"4_CR8","unstructured":"Monge, G.: M\u00e9moire sur la th\u00e9orie des d\u00e9blais et des remblais. Mem. Math. Phys. Acad. Royale Sci. 666\u2013704 (1781)"},{"key":"4_CR9","unstructured":"Zhu, Y., Ji, Y., Zhao, Z., et al.: AWT: Transferring vision-language models via augmentation, weighting, and transportation. arXiv preprint arXiv:2407.04603 (2024)"},{"key":"4_CR10","doi-asserted-by":"crossref","unstructured":"Li, S., Xiao, T., Li, H., Zhou, B., Yue, D., Wang, X.: Person search with natural language description. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 1970\u20131979 (2017)","DOI":"10.1109\/CVPR.2017.551"},{"key":"4_CR11","doi-asserted-by":"crossref","unstructured":"Zhu, A., et al.: DSSL: deep surroundings-person separation learning for text-based person retrieval. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 209\u2013217 (2021)","DOI":"10.1145\/3474085.3475369"},{"key":"4_CR12","doi-asserted-by":"crossref","unstructured":"Shu, X., et al.: See finer, see more: implicit modality alignment for text-based person retrieval. In: European Conference on Computer Vision, pp. 624\u2013641 (2022). Springer","DOI":"10.1007\/978-3-031-25072-9_42"},{"key":"4_CR13","doi-asserted-by":"crossref","unstructured":"Yan, S., Dong, N., Zhang, L., Tang, J.: Clip-driven fine-grained text-image person re-identification. IEEE Transactions on Image Processing (2023)","DOI":"10.1109\/TIP.2023.3327924"},{"key":"4_CR14","doi-asserted-by":"crossref","unstructured":"Chen, Y.-C., et al.: UNITER: universal image-text representation learning. In: European Conference on Computer Vision, pp. 104\u2013120 (2020). Springer","DOI":"10.1007\/978-3-030-58577-8_7"},{"key":"4_CR15","doi-asserted-by":"crossref","unstructured":"Mu, N., Kirillov, A., Wagner, D., Xie, S.: SLIP: self-supervision meets language-image pre-training. In: European Conference on Computer Vision, pp. 529\u2013544 (2022). Springer","DOI":"10.1007\/978-3-031-19809-0_30"},{"key":"4_CR16","unstructured":"Yao, L., et al.: FILIP: Fine-grained interactive language-image pre-training. arXiv preprint arXiv:2111.07783 (2021)"},{"key":"4_CR17","doi-asserted-by":"crossref","unstructured":"Gao, P., et al.: Clip-adapter: better vision-language models with feature adapters. Int. J. Comput. Vis. 132(2), 581\u2013595 (2024). Springer","DOI":"10.1007\/s11263-023-01891-x"},{"key":"4_CR18","unstructured":"Wang, D., Shelhamer, E., Liu, S., Olshausen, B., Darrell, T.: Tent: fully test-time adaptation by entropy minimization. In: International Conference on Learning Representations (2020)"},{"key":"4_CR19","doi-asserted-by":"publisher","first-page":"03","DOI":"10.1007\/s10958-006-0049-2","volume":"133","author":"L Kantorovich","year":"2006","unstructured":"Kantorovich, L.: On the translocation of masses. J. Math. Sci. 133, 03 (2006). https:\/\/doi.org\/10.1007\/s10958-006-0049-2","journal-title":"J. Math. Sci."},{"key":"4_CR20","unstructured":"Cuturi, M.: Sinkhorn distances: lightspeed computation of optimal transport. Adv. Neural Inf. Process. Syst. 26 (2013)"},{"key":"4_CR21","doi-asserted-by":"crossref","unstructured":"Jiang, D., Ye, M.: Cross-modal implicit relation reasoning and aligning for text to-image person retrieval. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 2787\u20132797 (2023)","DOI":"10.1109\/CVPR52729.2023.00273"},{"issue":"6","key":"4_CR22","doi-asserted-by":"publisher","first-page":"2872","DOI":"10.1109\/TPAMI.2021.3054775","volume":"44","author":"M Ye","year":"2021","unstructured":"Ye, M., Shen, J., Lin, G., Xiang, T., Shao, L., Hoi, S.C.: Deep learning for person re-identification: a survey and outlook. IEEE Trans. Pattern Anal. Mach. Intell. 44(6), 2872\u20132893 (2021)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"4_CR23","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"4_CR24","doi-asserted-by":"crossref","unstructured":"Sarafianos, N., Xu, X., Kakadiaris, I. A.: Adversarial representation learning for text-to-image matching. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 5814\u20135824 (2019)","DOI":"10.1109\/ICCV.2019.00591"},{"key":"4_CR25","doi-asserted-by":"crossref","unstructured":"Wu, Y., Yan, Z., Han, X., Li, G., Zou, C., Cui, S.: LapsCore: language-guided person search via color reasoning. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1624\u20131633 (2021)","DOI":"10.1109\/ICCV48922.2021.00165"},{"key":"4_CR26","doi-asserted-by":"crossref","unstructured":"Shao, Z., Zhang, X., Fang, M., Lin, Z., Wang, J., Ding, C.: Learning granularity-unified representations for text-to-image person re-identification. In: Proceedings of the 30th ACM International Conference on Multimedia, pp. 5566\u20135574 (2022)","DOI":"10.1145\/3503161.3548028"},{"key":"4_CR27","doi-asserted-by":"crossref","unstructured":"Shu, X., et al.: See finer, see more: implicit modality alignment for text-based person retrieval. In: European Conference on Computer Vision, pp. 624\u2013641 (2022). Springer","DOI":"10.1007\/978-3-031-25072-9_42"},{"key":"4_CR28","doi-asserted-by":"crossref","unstructured":"He, S., Luo, H., Jiang, W., Jiang, X., Ding, H.: VGSG: vision-Guided Semantic-Group Network for Text-Based Person Search. IEEE Trans. Image Process. 33, 163\u2013176 (2023). IEEE","DOI":"10.1109\/TIP.2023.3337653"},{"key":"4_CR29","doi-asserted-by":"crossref","unstructured":"Du, G., Gong, T., Zhang, L.: Contrastive completing learning for practical text\u2013image person ReID: Robuster and cheaper. Expert Syst. Appl. 248, 123399 (2024). Elsevier","DOI":"10.1016\/j.eswa.2024.123399"},{"key":"4_CR30","doi-asserted-by":"crossref","unstructured":"Gan, W., Liu, J., Zhu, Y., Wu, Y., Zhao, G., Zha, Z.-J.: Cross-modal semantic alignment learning for text-based person search. In: International Conference on Multimedia Modeling, pp. 201\u2013215 (2024). Springer","DOI":"10.1007\/978-3-031-53305-1_16"},{"key":"4_CR31","doi-asserted-by":"crossref","unstructured":"Xue, J., Wang, Z., Dong, G.-N., Zhu, A.: EESSO: exploiting extreme and smooth signals via omni-frequency learning for text-based person retrieval. Image Vision Comput. 142, 104912 (2024). Elsevier","DOI":"10.1016\/j.imavis.2024.104912"},{"key":"4_CR32","unstructured":"Li, W., Tan, L., Dai, P., Zhang, Y.: Prompt Decoupling for Text-to-Image Person Re-identification. arXiv preprint arXiv:2401.02173 (2024)"},{"key":"4_CR33","doi-asserted-by":"crossref","unstructured":"Cao, M., Bai, Y., Zeng, Z., Ye, M., Zhang, M.: An empirical study of clip for text-based person search. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, no. 1, pp. 465\u2013473 (2024)","DOI":"10.1609\/aaai.v38i1.27801"},{"key":"4_CR34","doi-asserted-by":"crossref","unstructured":"Zhao, Z., Liu, B., Lu, Y., Chu, Q., Yu, N.: Unifying multi-modal uncertainty modeling and semantic alignment for text-to-image person re-identification. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 38, no. 7, pp. 7534\u20137542 (2024)","DOI":"10.1609\/aaai.v38i7.28585"}],"container-title":["Lecture Notes in Computer Science","Web and Big Data"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5716-5_4","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,22]],"date-time":"2026-01-22T13:07:41Z","timestamp":1769087261000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5716-5_4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819557158","9789819557165"],"references-count":34,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5716-5_4","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"23 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"The authors have no competing interests to declare that are relevant to the content of this article.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Disclosure of Interests"}},{"value":"APWeb-WAIM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM) Joint International Conference on Web and Big Data","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shenyang","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"28 August 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"30 August 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"apwebwaim2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/apweb2025.sau.edu.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}