{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:16:11Z","timestamp":1765340171377,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":37,"publisher":"ACM","funder":[{"name":"National Natural and Science Foundation of China","award":["No.62350083, No.62202370, and No.62442604"],"award-info":[{"award-number":["No.62350083, No.62202370, and No.62442604"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755847","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T07:38:54Z","timestamp":1761377934000},"page":"787-795","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["BiOMamba: Mamba-based Forward-Then-Backward Temporal Modeling for Online Action Detection and Anticipation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-4394-5723","authenticated-orcid":false,"given":"Sensen","family":"Wang","sequence":"first","affiliation":[{"name":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-1048-5115","authenticated-orcid":false,"given":"Yuehu","family":"Liu","sequence":"additional","affiliation":[{"name":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-9604-2800","authenticated-orcid":false,"given":"Chi","family":"Zhang","sequence":"additional","affiliation":[{"name":"Institute of Artificial Intelligence and Robotics, Xi'an Jiaotong University, Xi'an, China"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.neuron.2016.07.047"},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00949"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01930"},{"key":"e_1_3_2_1_5_1","unstructured":"MMAction2 Contributors. 2020. OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark. https:\/\/github.com\/open-mmlab\/mmaction2."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_17"},{"key":"e_1_3_2_1_7_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46454-1_17"},{"key":"e_1_3_2_1_8_1","article-title":"Visualizing data using t-SNE","volume":"9","author":"der Maaten Laurens Van","year":"2008","unstructured":"Van der Maaten Laurens and Hinton Geoffrey. 2008. Visualizing data using t-SNE. Journal of machine learning research, Vol. 9, 11 (2008).","journal-title":"Journal of machine learning research"},{"key":"e_1_3_2_1_9_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00089"},{"key":"e_1_3_2_1_10_1","volume-title":"Red: Reinforced encoder-decoder networks for action anticipation. arXiv preprint arXiv:1707.04818","author":"Gao Jiyang","year":"2017","unstructured":"Jiyang Gao, Zhenheng Yang, and Ram Nevatia. 2017. Red: Reinforced encoder-decoder networks for action anticipation. arXiv preprint arXiv:1707.04818 (2017)."},{"key":"e_1_3_2_1_11_1","doi-asserted-by":"publisher","DOI":"10.1145\/3240508.3240548"},{"key":"e_1_3_2_1_12_1","volume-title":"Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752","author":"Gu Albert","year":"2023","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:2312.00752 (2023)."},{"key":"e_1_3_2_1_13_1","doi-asserted-by":"publisher","DOI":"10.1145\/3123266.3123394"},{"key":"e_1_3_2_1_14_1","volume-title":"Activitynet: A large-scale video benchmark for human activity understanding. 961-970 pages.","author":"Heilbron Fabian Caba","year":"2015","unstructured":"Fabian Caba Heilbron, Victor Escorcia, Bernard Ghanem, and Juan Carlos Niebles. 2015. Activitynet: A large-scale video benchmark for human activity understanding. 961-970 pages."},{"key":"e_1_3_2_1_15_1","unstructured":"Y.-G. Jiang J. Liu A. Roshan Zamir G. Toderici I. Laptev M. Shah and R. Sukthankar. 2014. THUMOS challenge: Action recognition with a large number of classes."},{"key":"e_1_3_2_1_16_1","volume-title":"Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980","author":"Kingma Diederik P","year":"2014","unstructured":"Diederik P Kingma and Jimmy Ba. 2014. Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)."},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1145\/3664647.3681456"},{"key":"e_1_3_2_1_18_1","doi-asserted-by":"publisher","DOI":"10.1145\/2964284.2967279"},{"key":"e_1_3_2_1_19_1","volume-title":"A critical review of recurrent neural networks for sequence learning. arXiv preprint arXiv:1506.00019","author":"Lipton Zachary C","year":"2015","unstructured":"Zachary C Lipton, John Berkowitz, and Charles Elkan. 2015. A critical review of recurrent neural networks for sequence learning. arXiv preprint arXiv:1506.00019 (2015)."},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2023.3313258"},{"key":"e_1_3_2_1_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52734.2025.00813"},{"key":"e_1_3_2_1_22_1","volume-title":"International conference on machine learning. Pmlr, 1310-1318","author":"Pascanu Razvan","year":"2013","unstructured":"Razvan Pascanu, Tomas Mikolov, and Yoshua Bengio. 2013. On the difficulty of training recurrent neural networks. In International conference on machine learning. Pmlr, 1310-1318."},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.39"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1126\/science.aaw5181"},{"key":"e_1_3_2_1_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/78.650093"},{"key":"e_1_3_2_1_26_1","volume-title":"Attention is all you need. Advances in neural information processing systems","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, \u0141ukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01271"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"e_1_3_2_1_29_1","doi-asserted-by":"publisher","DOI":"10.1145\/3719384.3719412"},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2025.111773"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00747"},{"key":"e_1_3_2_1_32_1","first-page":"47908","article-title":"Does Video-Text Pretraining Help Open-Vocabulary Online Action Detection","volume":"37","author":"Wang Yi","year":"2024","unstructured":"Yi Wang, Jilan Xu, Yinan He, Zifan Song, Limin Wang, Yu Qiao, Cairong Zhao, et al., 2024. Does Video-Text Pretraining Help Open-Vocabulary Online Action Detection? Advances in Neural Information Processing Systems, Vol. 37 (2024), 47908-47930.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_33_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00563"},{"key":"e_1_3_2_1_34_1","first-page":"1086","article-title":"Long short-term transformer for online action detection","volume":"34","author":"Xu Mingze","year":"2021","unstructured":"Mingze Xu, Yuanjun Xiong, Hao Chen, Xinyu Li, Wei Xia, Zhuowen Tu, and Stefano Soatto. 2021. Long short-term transformer for online action detection. Advances in Neural Information Processing Systems, Vol. 34 (2021), 1086-1099.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_35_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00316"},{"key":"e_1_3_2_1_36_1","volume-title":"MALT: Multi-scale Action Learning Transformer for Online Action Detection. In 2024 International Joint Conference on Neural Networks (IJCNN). IEEE, 1-8.","author":"Yang Zhipeng","year":"2024","unstructured":"Zhipeng Yang, Ruoyu Wang, Yang Tan, and Liping Xie. 2024. MALT: Multi-scale Action Learning Transformer for Online Action Detection. In 2024 International Joint Conference on Neural Networks (IJCNN). IEEE, 1-8."},{"key":"e_1_3_2_1_37_1","volume-title":"Real-time Online Video Detection with Temporal Smoothing Transformers. In European Conference on Computer Vision. Springer, 485-502","author":"Zhao Yue","year":"2022","unstructured":"Yue Zhao and Philipp Kr\u00e4henb\u00fchl. 2022. Real-time Online Video Detection with Temporal Smoothing Transformers. In European Conference on Computer Vision. Springer, 485-502."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755847","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:14:02Z","timestamp":1765340042000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755847"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":37,"alternative-id":["10.1145\/3746027.3755847","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755847","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}