{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,7,16]],"date-time":"2025-07-16T13:02:52Z","timestamp":1752670972449,"version":"3.41.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","license":[{"start":{"date-parts":[[2023,6,12]],"date-time":"2023-06-12T00:00:00Z","timestamp":1686528000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2023,6,12]]},"DOI":"10.1145\/3591106.3592233","type":"proceedings-article","created":{"date-parts":[[2023,6,8]],"date-time":"2023-06-08T22:33:38Z","timestamp":1686263618000},"page":"262-271","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":1,"title":["Less is More: Decoupled High-Semantic Encoding for Action Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8441-8679","authenticated-orcid":false,"given":"Chun","family":"Zhang","sequence":"first","affiliation":[{"name":"Faculty of Information Technology, Beijing University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-7332-2882","authenticated-orcid":false,"given":"Keyan","family":"Ren","sequence":"additional","affiliation":[{"name":"Faculty of Information Technology, Beijing University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-3334-9508","authenticated-orcid":false,"given":"Qingyun","family":"Bian","sequence":"additional","affiliation":[{"name":"Faculty of Information Technology, Beijing University of Technology, China"}]},{"ORCID":"https:\/\/orcid.org\/0009-0008-8088-5510","authenticated-orcid":false,"given":"Yu","family":"Shi","sequence":"additional","affiliation":[{"name":"Faculty of Information Technology,, Beijing University of Technology, China"}]}],"member":"320","published-online":{"date-parts":[[2023,6,12]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00676"},{"key":"e_1_3_2_1_2_1","volume-title":"A Long Short-Term Memory Recurrent Neural Network Framework for Network Traffic Matrix Prediction. CoRR abs\/1705.05690","author":"Azzouni Abdelhadi","year":"2017","unstructured":"Abdelhadi Azzouni and Guy Pujolle. 2017. A Long Short-Term Memory Recurrent Neural Network Framework for Network Traffic Matrix Prediction. CoRR abs\/1705.05690 (2017). arXiv:1705.05690http:\/\/arxiv.org\/abs\/1705.05690"},{"key":"e_1_3_2_1_3_1","volume-title":"BEiT: BERT Pre-Training of Image Transformers. In The Tenth International Conference on Learning Representations, ICLR 2022","author":"Bao Hangbo","year":"2022","unstructured":"Hangbo Bao, Li Dong, Songhao Piao, and Furu Wei. 2022. BEiT: BERT Pre-Training of Image Transformers. In The Tenth International Conference on Learning Representations, ICLR 2022, Virtual Event, April 25-29, 2022. OpenReview.net. https:\/\/openreview.net\/forum?id=p-BhZSz59o4"},{"key":"e_1_3_2_1_4_1","volume-title":"Proceedings of the International Conference on Machine Learning (ICML).","author":"Bertasius Gedas","year":"2021","unstructured":"Gedas Bertasius, Heng Wang, and Lorenzo Torresani. 2021. Is Space-Time Attention All You Need for Video Understanding?. In Proceedings of the International Conference on Machine Learning (ICML)."},{"key":"e_1_3_2_1_5_1","volume-title":"End-to-End Object Detection with Transformers. CoRR abs\/2005.12872","author":"Carion Nicolas","year":"2020","unstructured":"Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, and Sergey Zagoruyko. 2020. End-to-End Object Detection with Transformers. CoRR abs\/2005.12872 (2020). arXiv:2005.12872https:\/\/arxiv.org\/abs\/2005.12872"},{"key":"e_1_3_2_1_6_1","volume-title":"A Short Note about Kinetics-600. CoRR abs\/1808.01340","author":"Carreira Jo\u00e3o","year":"2018","unstructured":"Jo\u00e3o Carreira, Eric Noland, Andras Banki-Horvath, Chloe Hillier, and Andrew Zisserman. 2018. A Short Note about Kinetics-600. CoRR abs\/1808.01340 (2018). arXiv:1808.01340http:\/\/arxiv.org\/abs\/1808.01340"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_8_1","unstructured":"MMAction2 Contributors. 2020. OpenMMLab\u2019s Next Generation Video Understanding Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmaction2."},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00675"},{"key":"e_1_3_2_1_11_1","volume-title":"X3D: Expanding Architectures for Efficient Video Recognition. CoRR abs\/2004.04730","author":"Feichtenhofer Christoph","year":"2020","unstructured":"Christoph Feichtenhofer. 2020. X3D: Expanding Architectures for Efficient Video Recognition. CoRR abs\/2004.04730 (2020). arXiv:2004.04730https:\/\/arxiv.org\/abs\/2004.04730"},{"key":"e_1_3_2_1_12_1","doi-asserted-by":"publisher","DOI":"10.48550\/arXiv.2205.09113"},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_2_1_14_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.622"},{"key":"e_1_3_2_1_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01553"},{"key":"e_1_3_2_1_16_1","volume-title":"The Kinetics Human Action Video Dataset. CoRR abs\/1705.06950","author":"Kay Will","year":"2017","unstructured":"Will Kay, Jo\u00e3o Carreira, Karen Simonyan, Brian Zhang, Chloe Hillier, Sudheendra Vijayanarasimhan, Fabio Viola, Tim Green, Trevor Back, Paul Natsev, Mustafa Suleyman, and Andrew Zisserman. 2017. The Kinetics Human Action Video Dataset. CoRR abs\/1705.06950 (2017). arXiv:1705.06950http:\/\/arxiv.org\/abs\/1705.06950"},{"key":"e_1_3_2_1_17_1","unstructured":"Alexander Kolesnikov Alexey Dosovitskiy Dirk Weissenborn Georg Heigold Jakob Uszkoreit Lucas Beyer Matthias Minderer Mostafa Dehghani Neil Houlsby Sylvain Gelly Thomas Unterthiner and Xiaohua Zhai. 2021. An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale."},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.83"},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.113"},{"key":"e_1_3_2_1_20_1","volume-title":"Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. CoRR abs\/2103.14030","author":"Liu Ze","year":"2021","unstructured":"Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin, and Baining Guo. 2021. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. CoRR abs\/2103.14030 (2021). arXiv:2103.14030https:\/\/arxiv.org\/abs\/2103.14030"},{"key":"e_1_3_2_1_21_1","volume-title":"Video Swin Transformer. CoRR abs\/2106.13230","author":"Liu Ze","year":"2021","unstructured":"Ze Liu, Jia Ning, Yue Cao, Yixuan Wei, Zheng Zhang, Stephen Lin, and Han Hu. 2021. Video Swin Transformer. CoRR abs\/2106.13230 (2021). arXiv:2106.13230https:\/\/arxiv.org\/abs\/2106.13230"},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19772-7_1"},{"key":"e_1_3_2_1_23_1","volume-title":"High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019","author":"Paszke Adam","year":"2019","unstructured":"Adam Paszke, Sam Gross, Francisco Massa, Adam Lerer, James Bradbury, Gregory Chanan, Trevor Killeen, Zeming Lin, Natalia Gimelshein, Luca Antiga, Alban Desmaison, Andreas K\u00f6pf, Edward\u00a0Z. Yang, Zachary DeVito, Martin Raison, Alykhan Tejani, Sasank Chilamkurthy, Benoit Steiner, Lu Fang, Junjie Bai, and Soumith Chintala. 2019. PyTorch: An Imperative Style, High-Performance Deep Learning Library. In Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems 2019, NeurIPS 2019, December 8-14, 2019, Vancouver, BC, Canada, Hanna\u00a0M. Wallach, Hugo Larochelle, Alina Beygelzimer, Florence d\u2019Alch\u00e9-Buc, Emily\u00a0B. Fox, and Roman Garnett (Eds.). 8024\u20138035. https:\/\/proceedings.neurips.cc\/paper\/2019\/hash\/bdbca288fee7f92f2bfa9f7012727740-Abstract.html"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.590"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.3390\/data5040104"},{"key":"e_1_3_2_1_26_1","volume-title":"Stochastic Gradient Descent for Non-smooth Optimization: Convergence Results and Optimal Averaging Schemes. CoRR abs\/1212.1824","author":"Shamir Ohad","year":"2012","unstructured":"Ohad Shamir and Tong Zhang. 2012. Stochastic Gradient Descent for Non-smooth Optimization: Convergence Results and Optimal Averaging Schemes. CoRR abs\/1212.1824 (2012). arXiv:1212.1824http:\/\/arxiv.org\/abs\/1212.1824"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.3390\/app12094165"},{"key":"e_1_3_2_1_28_1","volume-title":"C3D: Generic Features for Video Analysis. CoRR abs\/1412.0767","author":"Tran Du","year":"2014","unstructured":"Du Tran, Lubomir\u00a0D. Bourdev, Rob Fergus, Lorenzo Torresani, and Manohar Paluri. 2014. C3D: Generic Features for Video Analysis. CoRR abs\/1412.0767 (2014). arXiv:1412.0767http:\/\/arxiv.org\/abs\/1412.0767"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"e_1_3_2_1_30_1","volume-title":"Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan\u00a0N Gomez, \u0141\u00a0ukasz Kaiser, and Illia Polosukhin. 2017. Attention is All you Need. In Advances in Neural Information Processing Systems, I.\u00a0Guyon, U.\u00a0Von Luxburg, S.\u00a0Bengio, H.\u00a0Wallach, R.\u00a0Fergus, S.\u00a0Vishwanathan, and R.\u00a0Garnett (Eds.). Vol.\u00a030. Curran Associates, Inc.https:\/\/proceedings.neurips.cc\/paper\/2017\/file\/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf"},{"key":"e_1_3_2_1_31_1","volume-title":"ActionCLIP: A New Paradigm for Video Action Recognition. CoRR abs\/2109.08472","author":"Wang Mengmeng","year":"2021","unstructured":"Mengmeng Wang, Jiazheng Xing, and Yong Liu. 2021. ActionCLIP: A New Paradigm for Video Action Recognition. CoRR abs\/2109.08472 (2021). arXiv:2109.08472https:\/\/arxiv.org\/abs\/2109.08472"},{"key":"e_1_3_2_1_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00333"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.3390\/s20082226"},{"key":"e_1_3_2_1_34_1","volume-title":"British Machine Vision Conference 2018, BMVC 2018","author":"Zhang Da","year":"2018","unstructured":"Da Zhang, Xiyang Dai, Xin Wang, and Yuan-Fang Wang. 2018. S3D: Single Shot multi-Span Detector via Fully 3D Convolutional Networks. In British Machine Vision Conference 2018, BMVC 2018, Newcastle, UK, September 3-6, 2018. BMVA Press, 293. http:\/\/bmvc2018.org\/contents\/papers\/0141.pdf"},{"key":"e_1_3_2_1_35_1","volume-title":"iBOT: Image BERT Pre-Training with Online Tokenizer. CoRR abs\/2111.07832","author":"Zhou Jinghao","year":"2021","unstructured":"Jinghao Zhou, Chen Wei, Huiyu Wang, Wei Shen, Cihang Xie, Alan\u00a0L. Yuille, and Tao Kong. 2021. iBOT: Image BERT Pre-Training with Online Tokenizer. CoRR abs\/2111.07832 (2021). arXiv:2111.07832https:\/\/arxiv.org\/abs\/2111.07832"},{"key":"e_1_3_2_1_36_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v32i1.12342"},{"key":"e_1_3_2_1_37_1","volume-title":"Deformable DETR: Deformable Transformers for End-to-End Object Detection. CoRR abs\/2010.04159","author":"Zhu Xizhou","year":"2020","unstructured":"Xizhou Zhu, Weijie Su, Lewei Lu, Bin Li, Xiaogang Wang, and Jifeng Dai. 2020. Deformable DETR: Deformable Transformers for End-to-End Object Detection. CoRR abs\/2010.04159 (2020). arXiv:2010.04159https:\/\/arxiv.org\/abs\/2010.04159"}],"event":{"name":"ICMR '23: International Conference on Multimedia Retrieval","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Thessaloniki Greece","acronym":"ICMR '23"},"container-title":["Proceedings of the 2023 ACM International Conference on Multimedia Retrieval"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592233","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3591106.3592233","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,17]],"date-time":"2025-06-17T17:51:22Z","timestamp":1750182682000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3591106.3592233"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,6,12]]},"references-count":37,"alternative-id":["10.1145\/3591106.3592233","10.1145\/3591106"],"URL":"https:\/\/doi.org\/10.1145\/3591106.3592233","relation":{},"subject":[],"published":{"date-parts":[[2023,6,12]]},"assertion":[{"value":"2023-06-12","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}