{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T02:53:36Z","timestamp":1768272816910,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":19,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556755","type":"print"},{"value":"9789819556762","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5676-2_10","type":"book-chapter","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:28Z","timestamp":1768249948000},"page":"143-157","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Divide-and-Specialize CLIP: A Text-Guided Multi-expert Framework for\u00a0Fine-Grained Action Recognition"],"prefix":"10.1007","author":[{"ORCID":"https:\/\/orcid.org\/0009-0004-5546-6833","authenticated-orcid":false,"given":"Lingjie","family":"Zeng","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0002-5993-0416","authenticated-orcid":false,"given":"Hailun","family":"Zhang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7296-6770","authenticated-orcid":false,"given":"Xinrui","family":"Wang","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0009-0003-6317-9909","authenticated-orcid":false,"given":"Zhen","family":"Zhai","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4651-7163","authenticated-orcid":false,"given":"Qijun","family":"Zhao","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0241-1156","authenticated-orcid":false,"given":"Hanyang","family":"Lin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"10_CR1","unstructured":"Ba, J.L., Kiros, J.R., Hinton, G.E.: Layer normalization. NeurIPS (2016)"},{"key":"10_CR2","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. In: International Conference on Learning Representations (2020)"},{"key":"10_CR3","doi-asserted-by":"crossref","unstructured":"Goyal, R., Ebrahimi\u00a0Kahou, S., Michalski, V., Materzynska, J., Westphal, S., Kim, H., Haenel, V., Fruend, I., Yianilos, P., Mueller-Freitag, M., et\u00a0al.: The\" something something\" video database for learning and evaluating visual common sense. In: Proceedings of the IEEE international conference on computer vision. pp. 5842\u20135850 (2017)","DOI":"10.1109\/ICCV.2017.622"},{"key":"10_CR4","doi-asserted-by":"publisher","unstructured":"Jia, K., Ma, Z., Zhu, R., Li, Y.: Attention-mechanism-based light single shot multibox detector modelling improvement for small object detection on the sea surface. Journal of Image and Graphics 27(4), 1161\u20131175 (2022). https:\/\/doi.org\/10.11834\/jig.200517","DOI":"10.11834\/jig.200517"},{"key":"10_CR5","doi-asserted-by":"crossref","unstructured":"Ju, C., Han, T., Zheng, K., Zhang, Y., Xie, W.: Prompting visual-language models for efficient video understanding. In: European Conference on Computer Vision. pp. 105\u2013124. Springer (2022)","DOI":"10.1007\/978-3-031-19833-5_7"},{"key":"10_CR6","unstructured":"Kay, W., Carreira, J., et\u00a0al.: The kinetics human action video dataset. arXiv:1705.06950 (2017)"},{"key":"10_CR7","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. ICLR (2015)"},{"key":"10_CR8","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., et\u00a0al.: Hmdb: a large video database for human motion recognition. In: ICCV (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"10_CR9","doi-asserted-by":"crossref","unstructured":"Li, Y., Li, Y., et\u00a0al.: Resound: Towards action recognition without representation bias. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01231-1_32"},{"key":"10_CR10","doi-asserted-by":"crossref","unstructured":"Lin, Z., Geng, S., Zhang, R., Gao, P., De\u00a0Melo, G., Wang, X., Dai, J., Qiao, Y., Li, H.: Frozen clip models are efficient video learners. In: European Conference on Computer Vision. pp. 388\u2013404. Springer (2022)","DOI":"10.1007\/978-3-031-19833-5_23"},{"key":"10_CR11","unstructured":"M\u00fcllner, D.: Modern hierarchical, agglomerative clustering algorithms. arXiv e-prints pp. arXiv\u20131109 (2011)"},{"key":"10_CR12","unstructured":"Radford, A., Kim, J.W., Hallacy, C., Ramesh, A., Goh, G., Agarwal, S., Sastry, G., Askell, A., Mishkin, P., Clark, J., et\u00a0al.: Learning transferable visual models from natural language supervision. In: International conference on machine learning. pp. 8748\u20138763. PmLR (2021)"},{"key":"10_CR13","unstructured":"Wang, M., Xing, J., Mei, J., Liu, Y., Jiang, Y.: Actionclip: Adapting language-image pretrained models for video action recognition. IEEE Transactions on Neural Networks and Learning Systems (2023)"},{"issue":"2","key":"10_CR14","doi-asserted-by":"publisher","first-page":"392","DOI":"10.1007\/s11263-023-01876-w","volume":"132","author":"W Wu","year":"2024","unstructured":"Wu, W., Sun, Z., Song, Y., Wang, J., Ouyang, W.: Transferring vision-language models for visual recognition: A classifier perspective. Int. J. Comput. Vision 132(2), 392\u2013409 (2024)","journal-title":"Int. J. Comput. Vision"},{"key":"10_CR15","doi-asserted-by":"publisher","unstructured":"Yan, Z., Hou, Z., Xiong, L., Liu, X., Yu, W., Ma, S.: Fine-grained classification based on bilinear feature fusion and yolov3. Journal of Image and Graphics 26(4), 847\u2013856 (2021). https:\/\/doi.org\/10.11834\/jig.200031","DOI":"10.11834\/jig.200031"},{"key":"10_CR16","unstructured":"Yang, T., Zhu, Y., Xie, Y., Zhang, A., Chen, C., Li, M.: Aim: Adapting image models for efficient video action recognition. In: The Eleventh International Conference on Learning Representations"},{"key":"10_CR17","doi-asserted-by":"crossref","unstructured":"Zhai, Z., Zhang, H., Zhao, Q., Fu, K.: Species-aware guidance for animal action recognition with vision-language knowledge. In: Chinese Conference on Pattern Recognition and Computer Vision (PRCV). pp. 570\u2013583. Springer (2024)","DOI":"10.1007\/978-981-97-8511-7_40"},{"key":"10_CR18","doi-asserted-by":"crossref","unstructured":"Zhang, H., Wang, X., Zhao, Q.: Granularity-aware contrastive learning for fine-grained action recognition. In: ICASSP 2025-2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). pp.\u00a01\u20135. IEEE (2025)","DOI":"10.1109\/ICASSP49660.2025.10889703"},{"key":"10_CR19","doi-asserted-by":"crossref","unstructured":"Zhang, H., Zeng, Z., Zhao, Q., Zhai, Z.: Concap: contrastive context-aware prompt for resource-hungry action recognition. In: 2023 IEEE International Conference on Multimedia and Expo (ICME). pp. 768\u2013773. IEEE (2023)","DOI":"10.1109\/ICME55011.2023.00137"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5676-2_10","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:31Z","timestamp":1768249951000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5676-2_10"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556755","9789819556762"],"references-count":19,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5676-2_10","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"13 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}