{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T09:08:35Z","timestamp":1765357715963,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":38,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"funder":[{"name":"Dreams Foundation of Jianghuai Advance Technology Center","award":["2023-ZM01Z001"],"award-info":[{"award-number":["2023-ZM01Z001"]}]},{"name":"Beijing Municipal Science Technology Commission, Administrative Commission of Zhongguancun Science Park","award":["Z231100005923035"],"award-info":[{"award-number":["Z231100005923035"]}]},{"name":"Natural Science Foundation of China","award":["62276242"],"award-info":[{"award-number":["62276242"]}]},{"name":"Anhui Province Key Research and Development Program","award":["202104a05020007"],"award-info":[{"award-number":["202104a05020007"]}]},{"name":"National Aviation Science Foundation","award":["2022Z0710 78001"],"award-info":[{"award-number":["2022Z0710 78001"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3688974","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:33Z","timestamp":1729925973000},"page":"11306-11312","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":3,"title":["End-to-end Spatio-Temporal Information Aggregation For Micro-Action Detection"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-3197-8103","authenticated-orcid":false,"given":"Jun","family":"Yu","sequence":"first","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3891-9207","authenticated-orcid":false,"given":"Mohan","family":"Jing","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1019-1477","authenticated-orcid":false,"given":"Guopeng","family":"Zhao","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-8974-3813","authenticated-orcid":false,"given":"Keda","family":"Lu","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-8153-492X","authenticated-orcid":false,"given":"Yifan","family":"Wang","sequence":"additional","affiliation":[{"name":"University of Science and Technology of China, Hefei, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-0323-9573","authenticated-orcid":false,"given":"Feng","family":"Zhao","sequence":"additional","affiliation":[{"name":"Xi'an University of Posts and Telecommunications, Xi an, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0007-3598-8564","authenticated-orcid":false,"given":"Jiaqing","family":"Sun","sequence":"additional","affiliation":[{"name":"Unisound AI Technology Co.,Ltd, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0004-1208-3031","authenticated-orcid":false,"given":"Qingsong","family":"Liu","sequence":"additional","affiliation":[{"name":"Unisound AI Technology Co., Ltd, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0001-8309-1301","authenticated-orcid":false,"given":"Jiaen","family":"Liang","sequence":"additional","affiliation":[{"name":"Unisound AI Technology Co., Ltd, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_1_1_1","first-page":"4","article-title":"Is space-time attention all you need for video understanding?","volume":"2","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is space-time attention all you need for video understanding?. In ICML, Vol. 2. 4.","journal-title":"ICML"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","unstructured":"Haoyu Chen Xin Liu Xiaobai Li Henglin Shi and Guoying Zhao. 2019. Analyze Spontaneous Gestures for Emotional Stress State Recognition: A Micro-gesture Dataset and Analysis with Deep Learning. In 2019 14th IEEE International Conference on Automatic Face & Gesture Recognition (FG 2019). 1--8. https:\/\/doi.org\/10.1109\/FG.2019.8756513","DOI":"10.1109\/FG.2019.8756513"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01941"},{"key":"e_1_3_2_1_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.53"},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2018.2883743"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2024.3358415"},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3688973"},{"key":"e_1_3_2_1_9_1","volume-title":"International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=0RDcd5Axok","author":"He Junxian","year":"2022","unstructured":"Junxian He, Chunting Zhou, Xuezhe Ma, Taylor Berg-Kirkpatrick, and Graham Neubig. 2022. Towards a Unified View of Parameter-Efficient Transfer Learning. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=0RDcd5Axok"},{"key":"e_1_3_2_1_10_1","volume-title":"Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research","volume":"2799","author":"Houlsby Neil","year":"2019","unstructured":"Neil Houlsby, Andrei Giurgiu, Stanislaw Jastrzebski, Bruna Morrone, Quentin De Laroussilhe, Andrea Gesmundo, Mona Attariyan, and Sylvain Gelly. 2019. Parameter-Efficient Transfer Learning for NLP. In Proceedings of the 36th International Conference on Machine Learning (Proceedings of Machine Learning Research, Vol. 97), Kamalika Chaudhuri and Ruslan Salakhutdinov (Eds.). PMLR, 2790--2799. https:\/\/proceedings.mlr.press\/v97\/houlsby19a.html"},{"key":"e_1_3_2_1_11_1","volume-title":"LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9","author":"Hu Edward J","year":"2022","unstructured":"Edward J Hu, yelong shen, Phillip Wallis, Zeyuan Allen-Zhu, Yuanzhi Li, Shean Wang, Lu Wang, and Weizhu Chen. 2022. LoRA: Low-Rank Adaptation of Large Language Models. In International Conference on Learning Representations. https:\/\/openreview.net\/forum?id=nZeVKeeFYf9"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01968"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01782"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"crossref","unstructured":"Brian Lester Rami Al-Rfou and Noah Constant. 2021. The Power of Scale for Parameter-Efficient Prompt Tuning. arxiv: 2104.08691 [cs.CL] https:\/\/arxiv.org\/abs\/2104.08691","DOI":"10.18653\/v1\/2021.emnlp-main.243"},{"key":"e_1_3_2_1_15_1","volume-title":"MMAD: Multi-label Micro-Action Detection in Videos. arXiv preprint arXiv:2407.05311","author":"Li Kun","year":"2024","unstructured":"Kun Li, Dan Guo, Pengyu Liu, Guoliang Chen, and Meng Wang. 2024. MMAD: Multi-label Micro-Action Detection in Videos. arXiv preprint arXiv:2407.05311 (2024)."},{"key":"e_1_3_2_1_16_1","volume-title":"Videomamba: State space model for efficient video understanding. arXiv preprint arXiv:2403.06977","author":"Li Kunchang","year":"2024","unstructured":"Kunchang Li, Xinhao Li, Yi Wang, Yinan He, Yali Wang, Limin Wang, and Yu Qiao. 2024. Videomamba: State space model for efficient video understanding. arXiv preprint arXiv:2403.06977 (2024)."},{"key":"e_1_3_2_1_17_1","volume-title":"UniFormer: Unified Transformer for Efficient Spatial-Temporal Representation Learning. In International Conference on Learning Representations.","author":"Li Kunchang","year":"2022","unstructured":"Kunchang Li, Yali Wang, Gao Peng, Guanglu Song, Yu Liu, Hongsheng Li, and Yu Qiao. 2022. UniFormer: Unified Transformer for Efficient Spatial-Temporal Representation Learning. In International Conference on Learning Representations."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00399"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01759"},{"key":"e_1_3_2_1_22_1","volume-title":"Zhengxiao Du, Zhilin Yang, and Jie Tang.","author":"Liu Xiao","year":"2022","unstructured":"Xiao Liu, Kaixuan Ji, Yicheng Fu, Weng Lam Tam, Zhengxiao Du, Zhilin Yang, and Jie Tang. 2022. P-Tuning v2: Prompt Tuning Can Be Comparable to Fine-tuning Universally Across Scales and Tasks. arxiv: 2110.07602 [cs.CL] https:\/\/arxiv.org\/abs\/2110.07602"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/tip.2022.3195321"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.aiopen.2023.08.012"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_2_1_26_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00269"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01238"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01808"},{"key":"e_1_3_2_1_30_1","volume-title":"Oh (Eds.)","volume":"35","author":"Tan Jing","year":"2022","unstructured":"Jing Tan, Xiaotong Zhao, Xintian Shi, Bin Kang, and Limin Wang. 2022. PointTAD: Multi-Label Temporal Action Detection with Learnable Query Points. In Advances in Neural Information Processing Systems, S. Koyejo, S. Mohamed, A. Agarwal, D. Belgrave, K. Cho, and A. Oh (Eds.), Vol. 35. Curran Associates, Inc., 15268--15280. https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2022\/file\/6255539f776ce988a81d3841eadc4cf9-Paper-Conference.pdf"},{"key":"e_1_3_2_1_31_1","volume-title":"Temporalmaxer: Maximize temporal context with only max pooling for temporal action localization. arXiv preprint arXiv:2303.09055","author":"Tang Tuan N","year":"2023","unstructured":"Tuan N Tang, Kwonyoung Kim, and Kwanghoon Sohn. 2023. Temporalmaxer: Maximize temporal context with only max pooling for temporal action localization. arXiv preprint arXiv:2303.09055 (2023)."},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"e_1_3_2_1_33_1","volume-title":"Ahmed Hassan Awadallah, and Jianfeng Gao","author":"Wang Yaqing","year":"2022","unstructured":"Yaqing Wang, Sahaj Agarwal, Subhabrata Mukherjee, Xiaodong Liu, Jing Gao, Ahmed Hassan Awadallah, and Jianfeng Gao. 2022. AdaMix: Mixture-of-Adaptations for Parameter-efficient Model Tuning. arxiv: 2205.12410 [cs.CL] https:\/\/arxiv.org\/abs\/2205.12410"},{"key":"e_1_3_2_1_34_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.288"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"crossref","unstructured":"Elad Ben Zaken Shauli Ravfogel and Yoav Goldberg. 2022. BitFit: Simple Parameter-efficient Fine-tuning for Transformer-based Masked Language-models. arxiv: 2106.10199 [cs.LG] https:\/\/arxiv.org\/abs\/2106.10199","DOI":"10.18653\/v1\/2022.acl-short.1"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_29"},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/178"},{"key":"e_1_3_2_1_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01756"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Melbourne VIC Australia","acronym":"MM '24"},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3688974","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3688974","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:28Z","timestamp":1750295848000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3688974"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":38,"alternative-id":["10.1145\/3664647.3688974","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3688974","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}