{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,26]],"date-time":"2025-03-26T10:09:10Z","timestamp":1742983750885,"version":"3.40.3"},"publisher-location":"Singapore","reference-count":37,"publisher":"Springer Nature Singapore","isbn-type":[{"type":"print","value":"9789819620609"},{"type":"electronic","value":"9789819620616"}],"license":[{"start":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T00:00:00Z","timestamp":1735603200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,12,31]],"date-time":"2024-12-31T00:00:00Z","timestamp":1735603200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-96-2061-6_12","type":"book-chapter","created":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T05:46:04Z","timestamp":1735537564000},"page":"155-168","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Integrating S1 &amp;S2 Framework for\u00a0Enhanced Semantic Match in\u00a0Person Re-identification"],"prefix":"10.1007","author":[{"given":"Xiukang","family":"Yang","sequence":"first","affiliation":[]},{"given":"Jingguo","family":"Ge","sequence":"additional","affiliation":[]},{"given":"Hui","family":"Li","sequence":"additional","affiliation":[]},{"given":"Liangxiong","family":"Li","sequence":"additional","affiliation":[]},{"given":"Bingzhen","family":"Wu","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,12,31]]},"reference":[{"key":"12_CR1","doi-asserted-by":"crossref","unstructured":"Aich, A., Zheng, M., Karanam, S., Chen, T., Roy-Chowdhury, A.K., Wu, Z.: Spatio-temporal representation factorization for video-based person re-identification. In: International Conference on Computer Vision, ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00022"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Bai, S., Ma, B., Chang, H., Huang, R., Chen, X.: Salient-to-broad transition for video person re-identification. In: Computer Vision and Pattern Recognition, CVPR (2022)","DOI":"10.1109\/CVPR52688.2022.00719"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Changpinyo, S., Sharma, P., Ding, N., Soricut, R.: Conceptual 12M: pushing web-scale image-text pre-training to recognize long-tail visual concepts. In: Computer Vision and Pattern Recognition, CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00356"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Chen, H., Lagadec, B., Bremond, F.: ICE: inter-instance contrastive encoding for unsupervised person re-identification. In: International Conference on Computer Vision, ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01469"},{"key":"12_CR5","unstructured":"Chen, J., et al.: MiniGPT-v2: large language model as a unified interface for vision-language multi-task learning. https:\/\/arxiv.org\/abs\/2310.09478 (2023)"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Chen, T., et al.: ABD-Net: attentive but diverse person re-identification. In: International Conference on Computer Vision, ICCV (2019)","DOI":"10.1109\/ICCV.2019.00844"},{"key":"12_CR7","doi-asserted-by":"crossref","unstructured":"Chen, X., et al.: Salience-guided cascaded suppression network for person re-identification. In: Computer Vision and Pattern Recognition, CVPR (2020)","DOI":"10.1109\/CVPR42600.2020.00336"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Chum, O., Philbin, J., Sivic, J., Isard, M., Zisserman, A.: Total recall: automatic query expansion with a generative feature model for object retrieval. In: International Conference on Computer Vision, ICCV (2007)","DOI":"10.1109\/ICCV.2007.4408891"},{"key":"12_CR9","unstructured":"Dai, W., et al.: InstructBLIP: towards general-purpose vision-language models with instruction tuning. In: Conference on Neural Information Processing Systems, NeurIPS (2023)"},{"key":"12_CR10","doi-asserted-by":"crossref","unstructured":"Dai, Z., Wang, G., Yuan, W., Zhu, S., Tan, P.: Cluster contrast for unsupervised person re-identification. In: Proceedings of the Asian Conference on Computer Vision, ACCV (2022)","DOI":"10.1007\/978-3-031-26351-4_20"},{"key":"12_CR11","unstructured":"Ding, M., et al.: CogView: mastering text-to-image generation via transformers. In: Conference on Neural Information Processing Systems, NeurIPS (2021)"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Eom, C., Lee, G., Lee, J., Ham, B.: Video-based person re-identification with spatial and temporal memory networks. In: International Conference on Computer Vision, ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01182"},{"key":"12_CR13","unstructured":"Ge, Y., Zhu, F., Chen, D., Zhao, R., Li, H.: Self-paced contrastive learning with hybrid memory for domain adaptive object re-ID. In: Conference on Neural Information Processing Systems, NeurIPS (2020)"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"He, S., Luo, H., Wang, P., Wang, F., Li, H., Jiang, W.: TransReID: transformer-based object re-identification. In: International Conference on Computer Vision, ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.01474"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"He, T., Jin, X., Shen, X., Huang, J., Chen, Z., Hua, X.S.: Dense interaction learning for video-based person re-identification. In: International Conference on Computer Vision, ICCV (2021)","DOI":"10.1109\/ICCV48922.2021.00152"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Hou, R., Chang, H., Ma, B., Huang, R., Shan, S.: BiCnet-TKS: learning efficient spatial-temporal representation for video person re-identification. In: Computer Vision and Pattern Recognition, CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.00205"},{"key":"12_CR17","doi-asserted-by":"crossref","unstructured":"Hou, R., Chang, H., Ma, B., Shan, S., Chen, X.: Temporal complementary learning for video person re-identification. In: European Conference on Computer Vision, ECCV (2020)","DOI":"10.1007\/978-3-030-58595-2_24"},{"key":"12_CR18","unstructured":"Hu, E.J., et al.: LoRA: low-rank adaptation of large language models. In: International Conference on Learning Representations, ICLR (2022)"},{"key":"12_CR19","doi-asserted-by":"crossref","unstructured":"Jin, X., Lan, C., Zeng, W., Wei, G., Chen, Z.: Semantics-aligned representation learning for person re-identification. In: Association for the Advancement of Artificial Intelligence, AAAI (2020)","DOI":"10.1609\/aaai.v34i07.6775"},{"key":"12_CR20","unstructured":"Li, J., Li, D., Savarese, S., Hoi, S.: BLIP-2: bootstrapping language-image pre-training with frozen image encoders and large language models. In: International Conference on Machine Learning, ICML (2023)"},{"key":"12_CR21","doi-asserted-by":"crossref","unstructured":"Lin, T.Y., et al.: Microsoft COCO: common objects in context. In: European Conference on Computer Vision, ECCV (2014)","DOI":"10.1007\/978-3-319-10602-1_48"},{"key":"12_CR22","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. In: Conference on Neural Information Processing Systems, NeurIPS (2023)"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Liu, X., Zhang, P., Yu, C., Lu, H., Yang, X.: Watching you: global-guided reciprocal learning for video-based person re-identification. In: Computer Vision and Pattern Recognition, CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01313"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Qin, D., Gammeter, S., Bossard, L., Quack, T., Gool, L.V.: Hello neighbor: accurate object retrieval with k-reciprocal nearest neighbors. In: Computer Vision and Pattern Recognition, CVPR (2011)","DOI":"10.1109\/CVPR.2011.5995373"},{"key":"12_CR25","unstructured":"Schuhmann, C., et al.: LAION-5B: an open large-scale dataset for training next generation image-text models. In: Conference on Neural Information Processing Systems, NeurIPS (2022)"},{"key":"12_CR26","doi-asserted-by":"crossref","unstructured":"Sharma, P., Ding, N., Goodman, S., Soricut, R.: Conceptual captions: a cleaned, hypernymed, image alt-text dataset for automatic image captioning. In: Annual Meeting of the Association for Computational Linguistics, ACL (2018)","DOI":"10.18653\/v1\/P18-1238"},{"key":"12_CR27","unstructured":"Shen, X., Lin, Z., Brandt, J., Avidan, S., Wu, Y.: Object retrieval and localization with spatially-constrained similarity measure and k-NN re-ranking. In: Computer Vision and Pattern Recognition, CVPR (2012)"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Wang, G., Lai, J., Huang, P., Xie, X.: Spatial-temporal person re-identification. In: Association for the Advancement of Artificial Intelligence, AAAI (2019)","DOI":"10.1609\/aaai.v33i01.33018933"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Xuan, S., Zhang, S.: Intra-inter camera similarity for unsupervised person re-identification. In: Computer Vision and Pattern Recognition, CVPR (2021)","DOI":"10.1109\/CVPR46437.2021.01175"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Ye, M., Chen, J., Leng, Q., Liang, C., Wang, Z., Sun, K.: Coupled-view based ranking optimization for person re-identification. In: ACM International Conference on Multimedia, ACM MM (2015)","DOI":"10.1007\/978-3-319-14445-0_10"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Ye, M., et al.: Person reidentification via ranking aggregation of similarity pulling and dissimilarity pushing. IEEE Trans. Multimedia, TMM (2016)","DOI":"10.1109\/TMM.2016.2605058"},{"key":"12_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, G., Zhang, Y., Zhang, T., Li, B., Pu, S.: PHA: patch-wise high-frequency augmentation for transformer-based person re-identification. In: Computer Vision and Pattern Recognition, CVPR (2023)","DOI":"10.1109\/CVPR52729.2023.01358"},{"key":"12_CR33","unstructured":"Zhao, H., et al.: MMICL: empowering vision-language model with multi-modal in-context learning (2023). https:\/\/arxiv.org\/abs\/2309.07915"},{"key":"12_CR34","unstructured":"Zheng, L., et al.: Judging LLM-as-a-judge with MT-Bench and chatbot arena. In: Conference on Neural Information Processing Systems, NeurIPS (2023)"},{"key":"12_CR35","doi-asserted-by":"crossref","unstructured":"Zhong, Z., Zheng, L., Cao, D., Li, S.: Re-ranking person re-identification with k-reciprocal encoding. In: Computer Vision and Pattern Recognition, CVPR (2017)","DOI":"10.1109\/CVPR.2017.389"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Zhou, K., Yang, Y., Cavallaro, A., Xiang, T.: Omni-scale feature learning for person re-identification. In: International Conference on Computer Vision, ICCV (2019)","DOI":"10.1109\/ICCV.2019.00380"},{"key":"12_CR37","unstructured":"Zhu, D., Chen, J., Shen, X., Li, X., Elhoseiny, M.: MiniGPT-4: enhancing vision-language understanding with advanced large language models (2023). https:\/\/arxiv.org\/abs\/2304.10592"}],"container-title":["Lecture Notes in Computer Science","MultiMedia Modeling"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-96-2061-6_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,12,30]],"date-time":"2024-12-30T06:04:19Z","timestamp":1735538659000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-96-2061-6_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,31]]},"ISBN":["9789819620609","9789819620616"],"references-count":37,"URL":"https:\/\/doi.org\/10.1007\/978-981-96-2061-6_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2024,12,31]]},"assertion":[{"value":"31 December 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"MMM","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"International Conference on Multimedia Modeling","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Nara","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Japan","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"9 January 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"11 January 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"31","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"mmm2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"https:\/\/mmm2025.net\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}