{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,7,8]],"date-time":"2026-07-08T16:11:50Z","timestamp":1783527110203,"version":"3.55.0"},"publisher-location":"New York, NY, USA","reference-count":66,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,12,3]],"date-time":"2024-12-03T00:00:00Z","timestamp":1733184000000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.acm.org\/publications\/policies\/copyright_policy#Background"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,12,3]]},"DOI":"10.1145\/3696409.3700204","type":"proceedings-article","created":{"date-parts":[[2024,12,28]],"date-time":"2024-12-28T09:55:23Z","timestamp":1735379723000},"page":"1-8","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":9,"title":["SpikMamba: When SNN meets Mamba in Event-based Human Action Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0003-1340-9278","authenticated-orcid":false,"given":"Jiaqi","family":"Chen","sequence":"first","affiliation":[{"name":"College Of Information Science and Engineering, Northeastern University, Shenyang, CN"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6246-1748","authenticated-orcid":false,"given":"Yan","family":"Yang","sequence":"additional","affiliation":[{"name":"The Australian National University, Canberra, AU"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6863-8516","authenticated-orcid":false,"given":"Shizhuo","family":"Deng","sequence":"additional","affiliation":[{"name":"Northeastern University, Shenyang, CN"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0009-0006-7996-3716","authenticated-orcid":false,"given":"Da","family":"Teng","sequence":"additional","affiliation":[{"name":"College of Information Science and Engineering of Northeastern University, Shenyang, CN"}],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-9025-173X","authenticated-orcid":false,"given":"Liyuan","family":"Pan","sequence":"additional","affiliation":[{"name":"Beijing Institute of Technology, Beijing, CN"}],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"320","published-online":{"date-parts":[[2024,12,28]]},"reference":[{"key":"e_1_3_3_1_2_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.781"},{"key":"e_1_3_3_1_3_2","unstructured":"Tong Bu Wei Fang Jianhao Ding PengLin Dai Zhaofei Yu and Tiejun Huang. 2023. Optimal ANN-SNN conversion for high-accuracy and ultra-low-latency spiking neural networks. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.04347 (2023)."},{"key":"e_1_3_3_1_4_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2019.00217"},{"key":"e_1_3_3_1_5_2","doi-asserted-by":"crossref","unstructured":"Yongqiang Cao Yang Chen and Deepak Khosla. 2015. Spiking deep convolutional neural networks for energy-efficient object recognition. International Journal of Computer Vision 113 (2015) 54\u201366.","DOI":"10.1007\/s11263-014-0788-3"},{"key":"e_1_3_3_1_6_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_3_1_7_2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS55552.2023.10341740"},{"key":"e_1_3_3_1_8_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00266"},{"key":"e_1_3_3_1_9_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"e_1_3_3_1_10_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00630"},{"key":"e_1_3_3_1_11_2","unstructured":"Daniel\u00a0Y Fu Tri Dao Khaled\u00a0K Saab Armin\u00a0W Thomas Atri Rudra and Christopher R\u00e9. 2022. Hungry hungry hippos: Towards language modeling with state space models. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2212.14052 (2022)."},{"key":"e_1_3_3_1_12_2","doi-asserted-by":"crossref","unstructured":"Yue Gao Jiaxuan Lu Siqi Li Nan Ma Shaoyi Du Yipeng Li and Qionghai Dai. 2023. Action recognition and benchmark using event cameras. IEEE Transactions on Pattern Analysis and Machine Intelligence (2023).","DOI":"10.1109\/TPAMI.2023.3300741"},{"key":"e_1_3_3_1_13_2","doi-asserted-by":"publisher","DOI":"10.1109\/IJCNN48605.2020.9206681"},{"key":"e_1_3_3_1_14_2","unstructured":"Albert Gu and Tri Dao. 2023. Mamba: Linear-time sequence modeling with selective state spaces. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2312.00752 (2023)."},{"key":"e_1_3_3_1_15_2","unstructured":"Albert Gu Karan Goel and Christopher R\u00e9. 2021. Efficiently modeling long sequences with structured state spaces. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2111.00396 (2021)."},{"key":"e_1_3_3_1_16_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2019\/189"},{"key":"e_1_3_3_1_17_2","unstructured":"Dongchen Han Ziyi Wang Zhuofan Xia Yizeng Han Yifan Pu Chunjiang Ge Jun Song Shiji Song Bo Zheng and Gao Huang. 2024. Demystify Mamba in Vision: A Linear Attention Perspective. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2405.16605 (2024)."},{"key":"e_1_3_3_1_18_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"e_1_3_3_1_19_2","unstructured":"Eric Hunsberger and Chris Eliasmith. 2015. Spiking deep networks with LIF neurons. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1510.08829 (2015)."},{"key":"e_1_3_3_1_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICPR48806.2021.9412991"},{"key":"e_1_3_3_1_21_2","unstructured":"Jacques Kaiser Alexander Friedrich J Tieck Daniel Reichard Arne Roennau Emre Neftci and R\u00fcdiger Dillmann. 2019. Embodied neuromorphic vision with event-driven random backpropagation. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1904.04805 (2019)."},{"key":"e_1_3_3_1_22_2","doi-asserted-by":"crossref","unstructured":"Jacques Kaiser Hesham Mostafa and Emre Neftci. 2020. Synaptic plasticity dynamics for deep continuous local learning (DECOLLE). Frontiers in Neuroscience 14 (2020) 424.","DOI":"10.3389\/fnins.2020.00424"},{"key":"e_1_3_3_1_23_2","unstructured":"Will Kay Joao Carreira Karen Simonyan Brian Zhang Chloe Hillier Sudheendra Vijayanarasimhan Fabio Viola Tim Green Trevor Back Paul Natsev et\u00a0al. 2017. The kinetics human action video dataset. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1705.06950 (2017)."},{"key":"e_1_3_3_1_24_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00215"},{"key":"e_1_3_3_1_25_2","doi-asserted-by":"crossref","unstructured":"Yu Kong and Yun Fu. 2022. Human action recognition and prediction: A survey. International Journal of Computer Vision 130 5 (2022) 1366\u20131401.","DOI":"10.1007\/s11263-022-01594-9"},{"key":"e_1_3_3_1_26_2","doi-asserted-by":"crossref","unstructured":"Chankyu Lee Priyadarshini Panda Gopalakrishnan Srinivasan and Kaushik Roy. 2018. Training deep spiking convolutional neural networks with STDP-based unsupervised pre-training followed by supervised fine-tuning. Frontiers in neuroscience 12 (2018) 435.","DOI":"10.3389\/fnins.2018.00435"},{"key":"e_1_3_3_1_27_2","doi-asserted-by":"crossref","unstructured":"Athanasios Lentzas and Dimitris Vrakas. 2020. Non-intrusive human activity recognition and abnormal behavior detection on elderly people: A review. Artificial Intelligence Review 53 3 (2020) 1975\u20132021.","DOI":"10.1007\/s10462-019-09724-5"},{"key":"e_1_3_3_1_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2019.00718"},{"key":"e_1_3_3_1_29_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICME57554.2024.10688372"},{"key":"e_1_3_3_1_30_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2021\/240"},{"key":"e_1_3_3_1_31_2","doi-asserted-by":"publisher","DOI":"10.1109\/WACV56688.2023.00254"},{"key":"e_1_3_3_1_32_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00320"},{"key":"e_1_3_3_1_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.01345"},{"key":"e_1_3_3_1_34_2","unstructured":"Ilya Loshchilov and Frank Hutter. 2016. Sgdr: Stochastic gradient descent with warm restarts. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1608.03983 (2016)."},{"key":"e_1_3_3_1_35_2","doi-asserted-by":"crossref","unstructured":"Jean-Matthieu Maro Sio-Hoi Ieng and Ryad Benosman. 2020. Event-based gesture recognition with dynamic background suppression using smartphone computational capabilities. Frontiers in neuroscience 14 (2020) 275.","DOI":"10.3389\/fnins.2020.00275"},{"key":"e_1_3_3_1_36_2","doi-asserted-by":"crossref","unstructured":"Christoforos Mavrogiannis Francesca Baldini Allan Wang Dapeng Zhao Pete Trautman Aaron Steinfeld and Jean Oh. 2023. Core challenges of social robot navigation: A survey. ACM Transactions on Human-Robot Interaction 12 3 (2023) 1\u201339.","DOI":"10.1145\/3583741"},{"key":"e_1_3_3_1_37_2","unstructured":"Harsh Mehta Ankit Gupta Ashok Cutkosky and Behnam Neyshabur. 2022. Long range language modeling via gated state spaces. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2206.13947 (2022)."},{"key":"e_1_3_3_1_38_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01212"},{"key":"e_1_3_3_1_39_2","doi-asserted-by":"crossref","unstructured":"Shu Miao Guang Chen Xiangyu Ning Yang Zi Kejia Ren Zhenshan Bing and Alois Knoll. 2019. Neuromorphic vision datasets for pedestrian detection action recognition and fall detection. Frontiers in neurorobotics 13 (2019) 38.","DOI":"10.3389\/fnbot.2019.00038"},{"key":"e_1_3_3_1_40_2","doi-asserted-by":"publisher","unstructured":"Liyuan Pan Richard Hartley Cedric Scheerlinck Miaomiao Liu Xin Yu and Yuchao Dai. 2022. High Frame Rate Video Reconstruction Based on an Event Camera. IEEE Transactions on Pattern Analysis and Machine Intelligence 44 5 (2022) 2519\u20132533. 10.1109\/TPAMI.2020.3036667","DOI":"10.1109\/TPAMI.2020.3036667"},{"key":"e_1_3_3_1_41_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00698"},{"key":"e_1_3_3_1_42_2","doi-asserted-by":"crossref","unstructured":"Preksha Pareek and Ankit Thakkar. 2021. A survey on video-based human action recognition: recent updates datasets challenges and applications. Artificial Intelligence Review 54 3 (2021) 2259\u20132322.","DOI":"10.1007\/s10462-020-09904-8"},{"key":"e_1_3_3_1_43_2","doi-asserted-by":"publisher","DOI":"10.1109\/ISCAS.2019.8702581"},{"key":"e_1_3_3_1_44_2","unstructured":"Jiahao Qin and Feng Liu. 2024. Mamba-Spike: Enhancing the Mamba Architecture with a Spiking Front-End for Efficient Temporal Data Processing. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.11823 (2024)."},{"key":"e_1_3_3_1_45_2","first-page":"8748","volume-title":"International conference on machine learning","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong\u00a0Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et\u00a0al. 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PMLR, 8748\u20138763."},{"key":"e_1_3_3_1_46_2","doi-asserted-by":"publisher","DOI":"10.4018\/978-1-5225-0983-7.ch054"},{"key":"e_1_3_3_1_47_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW56347.2022.00301"},{"key":"e_1_3_3_1_48_2","unstructured":"Sumit\u00a0B Shrestha and Garrick Orchard. 2018. Slayer: Spike layer error reassignment in time. Advances in neural information processing systems 31 (2018)."},{"key":"e_1_3_3_1_49_2","doi-asserted-by":"crossref","unstructured":"Christopher Slobogin. 2002. Public privacy: camera surveillance of public places and the right to anonymity. Miss. lJ 72 (2002) 213.","DOI":"10.2139\/ssrn.364600"},{"key":"e_1_3_3_1_50_2","unstructured":"Jimmy\u00a0TH Smith Andrew Warrington and Scott\u00a0W Linderman. 2022. Simplified state space layers for sequence modeling. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2208.04933 (2022)."},{"key":"e_1_3_3_1_51_2","doi-asserted-by":"crossref","unstructured":"Nicholas Soures and Dhireesha Kudithipudi. 2019. Deep liquid state machines with neural plasticity for video activity recognition. Frontiers in neuroscience 13 (2019) 686.","DOI":"10.3389\/fnins.2019.00686"},{"key":"e_1_3_3_1_52_2","unstructured":"Zehua Sun Qiuhong Ke Hossein Rahmani Mohammed Bennamoun Gang Wang and Jun Liu. 2022. Human action recognition from various data modalities: A review. IEEE transactions on pattern analysis and machine intelligence 45 3 (2022) 3200\u20133225."},{"key":"e_1_3_3_1_53_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2015.510"},{"key":"e_1_3_3_1_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00675"},{"key":"e_1_3_3_1_55_2","unstructured":"Xiao Wang Shiao Wang Xixi Wang Zhicheng Zhao Lin Zhu Bo Jiang et\u00a0al. 2024. MambaEVT: Event Stream based Visual Object Tracking using State Space Model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2408.10487 (2024)."},{"key":"e_1_3_3_1_56_2","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v38i6.28372"},{"key":"e_1_3_3_1_57_2","doi-asserted-by":"publisher","DOI":"10.24963\/ijcai.2022\/347"},{"key":"e_1_3_3_1_58_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00169"},{"key":"e_1_3_3_1_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.01301"},{"key":"e_1_3_3_1_60_2","doi-asserted-by":"crossref","unstructured":"Yujie Wu Lei Deng Guoqi Li Jun Zhu and Luping Shi. 2018. Spatio-temporal backpropagation for training high-performance spiking neural networks. Frontiers in neuroscience 12 (2018) 331.","DOI":"10.3389\/fnins.2018.00331"},{"key":"e_1_3_3_1_61_2","doi-asserted-by":"crossref","unstructured":"Rong Xiao Huajin Tang Yuhao Ma Rui Yan and Garrick Orchard. 2019. An event-driven categorization model for AER image sensors using multispike encoding and learning. IEEE transactions on neural networks and learning systems 31 9 (2019) 3649\u20133657.","DOI":"10.1109\/TNNLS.2019.2945630"},{"key":"e_1_3_3_1_62_2","doi-asserted-by":"crossref","unstructured":"Bochen Xie Yongjian Deng Zhanpeng Shao Hai Liu and Youfu Li. 2022. Vmv-gcn: Volumetric multi-view based graph cnn for event stream classification. IEEE Robotics and Automation Letters 7 2 (2022) 1976\u20131983.","DOI":"10.1109\/LRA.2022.3140819"},{"key":"e_1_3_3_1_63_2","unstructured":"Bochen Xie Yongjian Deng Zhanpeng Shao Hai Liu Qingsong Xu and Youfu Li. 2023. Event voxel set transformer for spatiotemporal representation learning on event streams. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2303.03856 (2023)."},{"key":"e_1_3_3_1_64_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00982"},{"key":"e_1_3_3_1_65_2","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-72775-7_17"},{"key":"e_1_3_3_1_66_2","doi-asserted-by":"publisher","DOI":"10.1109\/IROS58592.2024.10801824"},{"key":"e_1_3_3_1_67_2","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.01763"}],"event":{"name":"MMAsia '24: ACM Multimedia Asia","location":"Auckland New Zealand","acronym":"MMAsia '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 6th ACM International Conference on Multimedia in Asia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700204","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3696409.3700204","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:10:15Z","timestamp":1750295415000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3696409.3700204"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,12,3]]},"references-count":66,"alternative-id":["10.1145\/3696409.3700204","10.1145\/3696409"],"URL":"https:\/\/doi.org\/10.1145\/3696409.3700204","relation":{},"subject":[],"published":{"date-parts":[[2024,12,3]]},"assertion":[{"value":"2024-12-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}