{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,27]],"date-time":"2026-01-27T10:57:45Z","timestamp":1769511465010,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":33,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819787913","type":"print"},{"value":"9789819787920","type":"electronic"}],"license":[{"start":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T00:00:00Z","timestamp":1731110400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,11,9]],"date-time":"2024-11-09T00:00:00Z","timestamp":1731110400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025]]},"DOI":"10.1007\/978-981-97-8792-0_19","type":"book-chapter","created":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T06:57:47Z","timestamp":1731049067000},"page":"269-283","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["LLMAction: Adapting Large Language Model for\u00a0Long-Term Action Anticipation"],"prefix":"10.1007","author":[{"given":"Binglu","family":"Wang","sequence":"first","affiliation":[]},{"given":"Yao","family":"Tian","sequence":"additional","affiliation":[]},{"given":"Changhe","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Le","family":"Yang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,11,9]]},"reference":[{"key":"19_CR1","doi-asserted-by":"crossref","unstructured":"Abu\u00a0Farha, Y., Gall, J.: Uncertainty-aware anticipation of activities. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision Workshops (2019)","DOI":"10.1109\/ICCVW.2019.00151"},{"key":"19_CR2","doi-asserted-by":"crossref","unstructured":"Abu\u00a0Farha, Y., Ke, Q., Schiele, B., Gall, J.: Long-term anticipation of activities with cycle consistency. In: Pattern Recognition: 42nd DAGM German Conference, DAGM GCPR 2020, T\u00fcbingen, Germany, September 28\u2013October 1, 2020, Proceedings 42, pp. 159\u2013173. Springer, Berlin (2021)","DOI":"10.1007\/978-3-030-71278-5_12"},{"key":"19_CR3","doi-asserted-by":"crossref","unstructured":"Abu\u00a0Farha, Y., Richard, A., Gall, J.: When will you do what?-anticipating temporal occurrences of activities. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5343\u20135352 (2018)","DOI":"10.1109\/CVPR.2018.00560"},{"key":"19_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"19_CR5","doi-asserted-by":"publisher","DOI":"10.1016\/j.paerosci.2024.100984","volume":"146","author":"L Chen","year":"2024","unstructured":"Chen, L., Cheng, C., Zhou, C., Zhang, Y., Wu, J.: Flapping rotary wing: a novel low-Reynolds number layout merging bionic features into micro rotors. Prog. Aerosp. Sci. 146, 100984 (2024)","journal-title":"Prog. Aerosp. Sci."},{"key":"19_CR6","doi-asserted-by":"crossref","unstructured":"Girase, H., Agarwal, N., Choi, C., Mangalam, K.: Latency matters: real-time action forecasting transformer. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 18759\u201318769 (2023)","DOI":"10.1109\/CVPR52729.2023.01799"},{"key":"19_CR7","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Grauman, K.: Anticipative video transformer. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 13505\u201313515 (2021)","DOI":"10.1109\/ICCV48922.2021.01325"},{"key":"19_CR8","doi-asserted-by":"crossref","unstructured":"Gong, D., Lee, J., Kim, M., Ha, S.J., Cho, M.: Future transformer for long-term action anticipation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3052\u20133061 (2022)","DOI":"10.1109\/CVPR52688.2022.00306"},{"key":"19_CR9","unstructured":"Gupta, A., Liu, J., Bo, L., Roy-Chowdhury, A.K., Mei, T.: A-act: Action anticipation through cycle transformations (2022). arXiv:2204.00942"},{"key":"19_CR10","unstructured":"Houlsby, N., Giurgiu, A., Jastrzebski, S., Morrone, B., De\u00a0Laroussilhe, Q., Gesmundo, A., Attariyan, M., Gelly, S.: Parameter-efficient transfer learning for NLP. In: International Conference on Machine Learning, pp. 2790\u20132799. PMLR (2019)"},{"key":"19_CR11","unstructured":"Hu, E.J., Wallis, P., Allen-Zhu, Z., Li, Y., Wang, S., Wang, L., Chen, W., et\u00a0al.: Lora: low-rank adaptation of large language models. In: International Conference on Learning Representations (2021)"},{"key":"19_CR12","doi-asserted-by":"crossref","unstructured":"Ke, Q., Fritz, M., Schiele, B.: Time-conditioned action anticipation in one shot. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 9925\u20139934 (2019)","DOI":"10.1109\/CVPR.2019.01016"},{"key":"19_CR13","unstructured":"Kenton, J.D.M.W.C., Toutanova, L.K.: Bert: pre-training of deep bidirectional transformers for language understanding. In: Proceedings of NAACL-HLT, pp. 4171\u20134186 (2019)"},{"key":"19_CR14","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Arslan, A., Serre, T.: The language of actions: recovering the syntax and semantics of goal-directed human activities. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 780\u2013787 (2014)","DOI":"10.1109\/CVPR.2014.105"},{"key":"19_CR15","doi-asserted-by":"crossref","unstructured":"Lester, B., Al-Rfou, R., Constant, N.: The power of scale for parameter-efficient prompt tuning. In: Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing. Association for Computational Linguistics (2021)","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"19_CR16","doi-asserted-by":"crossref","unstructured":"Li, X.L., Liang, P.: Prefix-tuning: optimizing continuous prompts for generation. In: Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (vol. 1: Long Papers), pp. 4582\u20134597 (2021)","DOI":"10.18653\/v1\/2021.acl-long.353"},{"key":"19_CR17","unstructured":"Liu, H., Li, C., Wu, Q., Lee, Y.J.: Visual instruction tuning. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"19_CR18","doi-asserted-by":"crossref","unstructured":"Liu, X., Zheng, Y., Du, Z., Ding, M., Qian, Y., Yang, Z., Tang, J.: GPT understands, too. AI Open (2023)","DOI":"10.1016\/j.aiopen.2023.08.012"},{"key":"19_CR19","unstructured":"Luo, G., Zhou, Y., Ren, T., Chen, S., Sun, X., Ji, R.: Cheap and quick: efficient vision-language instruction tuning for large language models. Adv. Neural Inf. Process. Syst. 36 (2024)"},{"key":"19_CR20","doi-asserted-by":"crossref","unstructured":"Moniruzzaman, M., Yin, Z., He, Z., Leu, M.C., Qin, R.: Jointly-learnt networks for future action anticipation via self-knowledge distillation and cycle consistency. IEEE Trans. Circuits Syst. Video Technol. (2022)","DOI":"10.1109\/TCSVT.2022.3232021"},{"key":"19_CR21","doi-asserted-by":"crossref","unstructured":"Nawhal, M., Jyothi, A.A., Mori, G.: Rethinking learning approaches for long-term action anticipation. In: European Conference on Computer Vision, pp. 558\u2013576. Springer, Berlin (2022)","DOI":"10.1007\/978-3-031-19830-4_32"},{"issue":"8","key":"19_CR22","first-page":"9","volume":"1","author":"A Radford","year":"2019","unstructured":"Radford, A., Wu, J., Child, R., Luan, D., Amodei, D., Sutskever, I., et al.: Language models are unsupervised multitask learners. OpenAI Blog 1(8), 9 (2019)","journal-title":"OpenAI Blog"},{"key":"19_CR23","doi-asserted-by":"crossref","unstructured":"Sener, F., Singhania, D., Yao, A.: Temporal aggregate representations for long-range video understanding. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XVI 16, pp. 154\u2013171. Springer, Berlin (2020)","DOI":"10.1007\/978-3-030-58517-4_10"},{"key":"19_CR24","doi-asserted-by":"crossref","unstructured":"Stein, S., McKenna, S.J.: Combining embedded accelerometers with computer vision for recognizing food preparation activities. In: Proceedings of the 2013 ACM International Joint Conference on Pervasive and Ubiquitous Computing, pp. 729\u2013738 (2013)","DOI":"10.1145\/2493432.2493482"},{"key":"19_CR25","doi-asserted-by":"publisher","DOI":"10.1016\/j.neucom.2023.127063","volume":"568","author":"J Su","year":"2024","unstructured":"Su, J., Ahmed, M., Lu, Y., Pan, S., Bo, W., Liu, Y.: RoFormer: enhanced transformer with rotary position embedding. Neurocomputing 568, 127063 (2024)","journal-title":"Neurocomputing"},{"key":"19_CR26","unstructured":"Touvron, H., Lavril, T., Izacard, G., Martinet, X., Lachaux, M.A., Lacroix, T., Rozi\u00e8re, B., Goyal, N., Hambro, E., Azhar, F., et\u00a0al.: Llama: Open and efficient foundation language models. CoRR (2023). arXiv:2302.13971"},{"key":"19_CR27","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"key":"19_CR28","doi-asserted-by":"publisher","first-page":"503","DOI":"10.1109\/LSP.2021.3061289","volume":"28","author":"B Wang","year":"2021","unstructured":"Wang, B., Yang, L., Zhao, Y.: POLO: learning explicit cross-modality fusion for temporal action localization. IEEE Signal Process. Lett. 28, 503\u2013507 (2021)","journal-title":"IEEE Signal Process. Lett."},{"issue":"4","key":"19_CR29","doi-asserted-by":"publisher","first-page":"2186","DOI":"10.1109\/TCSVT.2021.3089323","volume":"32","author":"B Wang","year":"2021","unstructured":"Wang, B., Zhang, X., Zhao, Y.: Exploring sub-action granularity for weakly supervised temporal action localization. IEEE Trans. Circuits Syst. Video Technol. 32(4), 2186\u20132198 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"19_CR30","doi-asserted-by":"crossref","unstructured":"Wang, B., Zhao, Y., Yang, L., Long, T., Li, X.: Temporal action localization in the deep learning era: a survey. IEEE Trans. Pattern Anal. Mach. Intell. (2023)","DOI":"10.1109\/TPAMI.2023.3330794"},{"key":"19_CR31","doi-asserted-by":"crossref","unstructured":"Wang, S., Zhou, T., Lu, Y., Di, H.: Detail-preserving transformer for light field image super-resolution. In: Proceedings of the AAAI Conference on Artificial Intelligence, vol.\u00a036, pp. 2522\u20132530 (2022)","DOI":"10.1609\/aaai.v36i3.20153"},{"key":"19_CR32","doi-asserted-by":"crossref","unstructured":"Zhang, H., Li, X., Bing, L.: Video-llama: an instruction-tuned audio-visual language model for video understanding. In: Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pp. 543\u2013553 (2023)","DOI":"10.18653\/v1\/2023.emnlp-demo.49"},{"key":"19_CR33","doi-asserted-by":"publisher","first-page":"203","DOI":"10.1016\/j.isprsjprs.2023.12.002","volume":"207","author":"X Zhang","year":"2024","unstructured":"Zhang, X., Li, Y., Li, F., Jiang, H., Wang, Y., Zhang, L., Zheng, L., Ding, Z.: Ship-Go: SAR ship images inpainting via instance-to-image generative diffusion models. ISPRS J. Photogramm. Remote Sens. 207, 203\u2013217 (2024)","journal-title":"ISPRS J. Photogramm. Remote Sens."}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-97-8792-0_19","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,11,8]],"date-time":"2024-11-08T07:08:54Z","timestamp":1731049734000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-97-8792-0_19"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,11,9]]},"ISBN":["9789819787913","9789819787920"],"references-count":33,"URL":"https:\/\/doi.org\/10.1007\/978-981-97-8792-0_19","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,11,9]]},"assertion":[{"value":"9 November 2024","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Urumqi","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2024","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2024","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"20 October 2024","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"7","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2024","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2024.prcv.cn\/","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}