{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T02:43:16Z","timestamp":1768272196268,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":39,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556755","type":"print"},{"value":"9789819556762","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5676-2_12","type":"book-chapter","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:06Z","timestamp":1768249926000},"page":"174-187","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["Few-Shot Action Recognition Based on\u00a0Visual-Language Prototype Hierarchical Temporal Enhancement"],"prefix":"10.1007","author":[{"given":"Bingbing","family":"Zhang","sequence":"first","affiliation":[]},{"given":"Yuanchen","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Meng","family":"Li","sequence":"additional","affiliation":[]},{"given":"Jianxin","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Qiang","family":"Zhang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"12_CR1","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1109\/TMM.2023.3263288","volume":"26","author":"Z Qing","year":"2023","unstructured":"Qing, Z., et al.: MAR: masked autoencoders for efficient action recognition. IEEE Trans. Multimedia 26, 218\u2013233 (2023)","journal-title":"IEEE Trans. Multimedia"},{"key":"12_CR2","doi-asserted-by":"crossref","unstructured":"Huang, P., Yan, R., Shu, X., Tu, Z., Dai, G., Tang, J.: Semantic-Disentangled Transformer With Noun-Verb Embedding for Compositional Action Recognition. IEEE Transactions on Image Processing (2023)","DOI":"10.1109\/TIP.2023.3341297"},{"key":"12_CR3","doi-asserted-by":"crossref","unstructured":"Jian, Y., Torresani, L.: Label hallucination for few-shot classification. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol. 36(6), pp. 7005\u20137014 (2022)","DOI":"10.1609\/aaai.v36i6.20659"},{"key":"12_CR4","doi-asserted-by":"crossref","unstructured":"Chen, Z., Fu, Y., Wang, Y.X., Ma, L., Liu, W., Hebert, M.: Image deformation meta-networks for one-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8680\u20138689 (2019)","DOI":"10.1109\/CVPR.2019.00888"},{"key":"12_CR5","doi-asserted-by":"crossref","unstructured":"Li, K., Zhang, Y., Li, K., Fu, Y.: Adversarial feature hallucination networks for few-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 13470\u201313479 (2020)","DOI":"10.1109\/CVPR42600.2020.01348"},{"key":"12_CR6","doi-asserted-by":"crossref","unstructured":"Jamal, M.A., Qi, G.J.: Task agnostic meta-learning for few-shot learning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 11719\u201311727 (2019)","DOI":"10.1109\/CVPR.2019.01199"},{"key":"12_CR7","unstructured":"Finn, C., Abbeel, P., Levine, S.: Model-agnostic meta-learning for fast adaptation of deep networks. In: International Conference on Machine Learning, pp. 1126\u20131135 (2017)"},{"key":"12_CR8","doi-asserted-by":"crossref","unstructured":"Ye, H.J., Hu, H., Zhan, D.C., Sha, F.: Few-shot learning via embedding adaptation with set-to-set functions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 8808\u20138817 (2020)","DOI":"10.1109\/CVPR42600.2020.00883"},{"key":"12_CR9","unstructured":"Vinyals, O., Blundell, C., Lillicrap, T., Wierstra, D., et al.: Matching networks for one shot learning. Adv. Neural Inf. Process. Syst. 29 (2016)"},{"key":"12_CR10","unstructured":"Snell, J., Swersky, K., Zemel, R.: Prototypical networks for few-shot learning. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"12_CR11","doi-asserted-by":"crossref","unstructured":"Pahde, F., Puscas, M., Klein, T., Nabi, M.: Multimodal prototypical networks for few-shot learning. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2644\u20132653 (2021)","DOI":"10.1109\/WACV48630.2021.00269"},{"key":"12_CR12","doi-asserted-by":"crossref","unstructured":"Cao, K., Ji, J., Cao, Z., Chang, C.Y., Niebles, J.C.: Few-shot video classification via temporal alignment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 10618\u201310627 (2020)","DOI":"10.1109\/CVPR42600.2020.01063"},{"key":"12_CR13","doi-asserted-by":"crossref","unstructured":"Perrett, T., Masullo, A., Burghardt, T., Mirmehdi, M., Damen, D.: Temporal-relational crosstransformers for few-shot action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 475\u2013484 (2021)","DOI":"10.1109\/CVPR46437.2021.00054"},{"key":"12_CR14","doi-asserted-by":"crossref","unstructured":"Wu, J., Zhang, T., Zhang, Z., Wu, F., Zhang, Y.: Motion-modulated temporal fragment alignment network for few-shot action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9151\u20139160 (2022)","DOI":"10.1109\/CVPR52688.2022.00894"},{"key":"12_CR15","doi-asserted-by":"crossref","unstructured":"Thatipelli, A., Narayan, S., Khan, S., Anwer, R.M., Khan, F.S., Ghanem, B.: Spatio-temporal relation modeling for few-shot action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19958\u201319967 (2022)","DOI":"10.1109\/CVPR52688.2022.01933"},{"key":"12_CR16","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: MoLo: motion-augmented long-short contrastive learning for few-shot action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18011\u201318021 (2023)","DOI":"10.1109\/CVPR52729.2023.01727"},{"key":"12_CR17","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107348","volume":"108","author":"L Zhang","year":"2020","unstructured":"Zhang, L., Chang, X., Liu, J., Luo, M., Prakash, M., Hauptmann, A.G.: Few-shot activity recognition with cross-modal memory network. Pattern Recogn. 108, 107348 (2020)","journal-title":"Pattern Recogn."},{"key":"12_CR18","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Semantic-guided relation propagation network for few-shot action recognition. In: Proceedings of the 29th ACM International Conference on Multimedia, pp. 816\u2013825 (2021)","DOI":"10.1145\/3474085.3475253"},{"issue":"10","key":"12_CR19","doi-asserted-by":"publisher","first-page":"5932","DOI":"10.1109\/TCSVT.2023.3262670","volume":"33","author":"X Wang","year":"2023","unstructured":"Wang, X., et al.: Task-aware dual-representation network for few-shot action recognition. IEEE Trans. Circuits Syst. Video Technol. 33(10), 5932\u20135946 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"12_CR20","unstructured":"Jin, R., Wang, X., Wang, G., Lu, Y., Hu, H.M., Wang, H.: Embedding adaptation network with transformer for few-shot action recognition. In: Asian Conference on Machine Learning, pp. 515\u2013530 (2023)"},{"issue":"6","key":"12_CR21","doi-asserted-by":"publisher","first-page":"1899","DOI":"10.1007\/s11263-023-01917-4","volume":"132","author":"X Wang","year":"2024","unstructured":"Wang, X., et al.: CLIP-guided prototype modulating for few-shot action recognition. Int. J. Comput. Vision 132(6), 1899\u20131912 (2024)","journal-title":"Int. J. Comput. Vision"},{"key":"12_CR22","doi-asserted-by":"crossref","unstructured":"Ni, X., Liu, Y., Wen, H., Ji, Y., Xiao, J., Yang, Y.: Multimodal prototype-enhanced network for few-shot action recognition. In: Proceedings of the 2024 International Conference on Multimedia Retrieval, pp. 1\u201310 (2024)","DOI":"10.1145\/3652583.3658044"},{"key":"12_CR23","doi-asserted-by":"crossref","unstructured":"Qu, H., Yan, R., Shu, X., Gao, H., Huang, P., Xie, G.S.: MVP-Shot: Multi-Velocity Progressive-Alignment Framework for Few-Shot Action Recognition. arXiv preprint arXiv:2405.02077 (2024)","DOI":"10.1109\/TMM.2025.3586118"},{"key":"12_CR24","doi-asserted-by":"crossref","unstructured":"Goyal, R., et al.: The \u201csomething something\u201d video database for learning and evaluating visual common sense. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 5842\u20135850 (2017)","DOI":"10.1109\/ICCV.2017.622"},{"key":"12_CR25","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"12_CR26","unstructured":"Soomro, K.: UCF101: A dataset of 101 human actions classes from videos in the wild. arXiv preprint arXiv:1212.0402 (2012)"},{"key":"12_CR27","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: HMDB: a large video database for human motion recognition. In: 2011 International Conference on Computer Vision, pp. 2556\u20132563 (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"12_CR28","doi-asserted-by":"crossref","unstructured":"Li, Y., Li, Y., Vasconcelos, N.: Resound: towards action recognition without representation bias. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 513\u2013528 (2018)","DOI":"10.1007\/978-3-030-01231-1_32"},{"key":"12_CR29","doi-asserted-by":"crossref","unstructured":"Zhu, L., Yang, Y.: Compound memory networks for few-shot video classification. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 751\u2013766 (2018)","DOI":"10.1007\/978-3-030-01234-2_46"},{"key":"12_CR30","doi-asserted-by":"crossref","unstructured":"Zhang, H., Zhang, L., Qi, X., Li, H., Torr, P.H., Koniusz, P.: Few-shot action recognition with permutation-invariant attention. In: Computer Vision\u2013ECCV 2020, pp. 525\u2013542 (2020)","DOI":"10.1007\/978-3-030-58558-7_31"},{"key":"12_CR31","doi-asserted-by":"crossref","unstructured":"Shi, Y., Wu, X., Lin, H., Luo, J.: Commonsense Knowledge Prompting for Few-shot Action Recognition in Videos. IEEE Transactions on Multimedia (2024)","DOI":"10.1109\/TMM.2024.3361157"},{"issue":"10","key":"12_CR32","doi-asserted-by":"publisher","first-page":"6955","DOI":"10.1109\/TCSVT.2022.3175923","volume":"32","author":"S Liu","year":"2022","unstructured":"Liu, S., Jiang, M., Kong, J.: Multidimensional prototype refactor enhanced network for few-shot action recognition. IEEE Trans. Circuits Syst. Video Technol. 32(10), 6955\u20136966 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"12_CR33","doi-asserted-by":"crossref","unstructured":"Liu, X., Zhang, H., Pirsiavash, H.: MASTAF: a model-agnostic spatio-temporal attention fusion network for few-shot video classification. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision, pp. 2508\u20132517 (2023)","DOI":"10.1109\/WACV56688.2023.00254"},{"key":"12_CR34","unstructured":"Radford, A., et al.: Learning transferable visual models from natural language supervision. In: International Conference on Machine Learning, pp. 8748\u20138763 (2021)"},{"key":"12_CR35","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2024.128649","volume":"611","author":"F Guo","year":"2025","unstructured":"Guo, F., Wang, Y., Qi, H., Zhu, L., Sun, J.: Consistency Prototype Module and Motion Compensation for few-shot action recognition (CLIP-CPM2C). Neurocomputing 611, 128649 (2025)","journal-title":"Neurocomputing"},{"key":"12_CR36","doi-asserted-by":"crossref","unstructured":"Zhang, Y., et al.: On the importance of spatial relations for few-shot action recognition. In: Proceedings of the 31st ACM International Conference on Multimedia, pp. 2243\u20132251 (2023)","DOI":"10.1145\/3581783.3612192"},{"key":"12_CR37","doi-asserted-by":"crossref","unstructured":"Xing, J., et al.: Boosting few-shot action recognition with graph-guided hybrid matching. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 1740\u20131750 (2023)","DOI":"10.1109\/ICCV51070.2023.00167"},{"key":"12_CR38","unstructured":"Wang, M., Xing, J., Mei, J., Liu, Y., Jiang, Y.: ActionCLIP: adapting language-image pretrained models for video action recognition. IEEE Transactions on Neural Networks and Learning Systems (2023)"},{"key":"12_CR39","doi-asserted-by":"crossref","unstructured":"Wang, X., et al.: Hybrid relation guided set matching for few-shot action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19948\u201319957 (2022)","DOI":"10.1109\/CVPR52688.2022.01932"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5676-2_12","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:13Z","timestamp":1768249933000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5676-2_12"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556755","9789819556762"],"references-count":39,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5676-2_12","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"13 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}