{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T13:46:43Z","timestamp":1776347203856,"version":"3.51.2"},"reference-count":48,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,11,1]],"date-time":"2026-11-01T00:00:00Z","timestamp":1793491200000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Pattern Recognition"],"published-print":{"date-parts":[[2026,11]]},"DOI":"10.1016\/j.patcog.2026.113672","type":"journal-article","created":{"date-parts":[[2026,4,2]],"date-time":"2026-04-02T13:07:35Z","timestamp":1775135255000},"page":"113672","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"PB","title":["SpikeTAD: Spiking neural networks for end-to-end temporal action detection"],"prefix":"10.1016","volume":"179","author":[{"ORCID":"https:\/\/orcid.org\/0009-0007-8298-8161","authenticated-orcid":false,"given":"Min","family":"Yang","sequence":"first","affiliation":[]},{"given":"Mi","family":"Zhou","sequence":"additional","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-3674-7718","authenticated-orcid":false,"given":"Limin","family":"Wang","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.patcog.2026.113672_b1","series-title":"Computer Vision - ECCV 2022 - 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part IV","first-page":"492","article-title":"ActionFormer: Localizing moments of actions with transformers","volume":"vol. 13664","author":"Zhang","year":"2022"},{"key":"10.1016\/j.patcog.2026.113672_b2","doi-asserted-by":"crossref","first-page":"5427","DOI":"10.1109\/TIP.2022.3195321","article-title":"End-to-end temporal action detection with transformer","volume":"31","author":"Liu","year":"2022","journal-title":"IEEE Trans. Image Process."},{"key":"10.1016\/j.patcog.2026.113672_b3","series-title":"2019 IEEE\/CVF International Conference on Computer Vision, ICCV 2019, Seoul, Korea (South), October 27 - November 2, 2019","first-page":"3888","article-title":"BMN: boundary-matching network for temporal action proposal generation","author":"Lin","year":"2019"},{"key":"10.1016\/j.patcog.2026.113672_b4","doi-asserted-by":"crossref","unstructured":"M. Yang, H. Gao, P. Guo, L. Wang, Adapting short-term transformers for action detection in untrimmed videos, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 18570\u201318579.","DOI":"10.1109\/CVPR52733.2024.01757"},{"key":"10.1016\/j.patcog.2026.113672_b5","series-title":"THUMOS challenge: Action recognition with a large number of classes.","author":"Jiang","year":"2014"},{"key":"10.1016\/j.patcog.2026.113672_b6","doi-asserted-by":"crossref","unstructured":"F. Caba Heilbron, V. Escorcia, B. Ghanem, J. Carlos Niebles, Activitynet: A large-scale video benchmark for human activity understanding, in: Proceedings of the Ieee Conference on Computer Vision and Pattern Recognition, 2015, pp. 961\u2013970.","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"10.1016\/j.patcog.2026.113672_b7","doi-asserted-by":"crossref","unstructured":"X. Chen, Y. Guo, J. Liang, S. Zhuang, R. Zeng, X. Hu, Temporal Action Detection Model Compression by Progressive Block Drop, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR, 2025.","DOI":"10.1109\/CVPR52734.2025.02721"},{"issue":"9","key":"10.1016\/j.patcog.2026.113672_b8","doi-asserted-by":"crossref","first-page":"1659","DOI":"10.1016\/S0893-6080(97)00011-7","article-title":"Networks of spiking neurons: the third generation of neural network models","volume":"10","author":"Maass","year":"1997","journal-title":"Neural Netw."},{"key":"10.1016\/j.patcog.2026.113672_b9","first-page":"21056","article-title":"Deep residual learning in spiking neural networks","volume":"34","author":"Fang","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113672_b10","unstructured":"Z. Zhou, Y. Zhu, C. He, Y. Wang, S. Yan, Y. Tian, L. Yuan, Spikformer: When spiking neural network meets transformer, in: International Conference on Learning Representations, ICLR, 2023."},{"key":"10.1016\/j.patcog.2026.113672_b11","doi-asserted-by":"crossref","unstructured":"S. Liu, C.-L. Zhang, C. Zhao, B. Ghanem, End-to-end temporal action detection with 1b parameters across 1000 frames, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2024, pp. 18591\u201318601.","DOI":"10.1109\/CVPR52733.2024.01759"},{"key":"10.1016\/j.patcog.2026.113672_b12","series-title":"Computer Vision - ECCV 2022 - 17th European Conference, Tel Aviv, Israel, October 23-27, 2022, Proceedings, Part XXXIV","first-page":"503","article-title":"Tallformer: Temporal action localization with a long-memory transformer","volume":"vol. 13694","author":"Cheng","year":"2022"},{"key":"10.1016\/j.patcog.2026.113672_b13","doi-asserted-by":"crossref","unstructured":"C. Zhao, S. Liu, K. Mangalam, B. Ghanem, Re2TAL: Rewiring pretrained video backbones for reversible temporal action localization, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 10637\u201310647.","DOI":"10.1109\/CVPR52729.2023.01025"},{"key":"10.1016\/j.patcog.2026.113672_b14","doi-asserted-by":"crossref","DOI":"10.1016\/j.cviu.2023.103692","article-title":"Basictad: An astounding RGB-only baseline for temporal action detection","volume":"232","author":"Yang","year":"2023","journal-title":"Comput. Vis. Image Underst."},{"key":"10.1016\/j.patcog.2026.113672_b15","series-title":"2017 IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017, Honolulu, HI, USA, July 21-26, 2017","first-page":"4724","article-title":"Quo vadis, action recognition? A new model and the kinetics dataset","author":"Carreira","year":"2017"},{"key":"10.1016\/j.patcog.2026.113672_b16","series-title":"Computer Vision - ECCV 2016 - 14th European Conference, Amsterdam, the Netherlands, October 11-14, 2016, Proceedings, Part VIII","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","volume":"9912","author":"Wang","year":"2016"},{"key":"10.1016\/j.patcog.2026.113672_b17","series-title":"IEEE\/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023, Vancouver, BC, Canada, June 17-24, 2023","first-page":"14549","article-title":"Videomae V2: scaling video masked autoencoders with dual masking","author":"Wang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113672_b18","series-title":"The kinetics human action video dataset","author":"Kay","year":"2017"},{"key":"10.1016\/j.patcog.2026.113672_b19","series-title":"The Twelfth International Conference on Learning Representations, ICLR 2024, Vienna, Austria, May 7-11, 2024","article-title":"InternVid: A large-scale video-text dataset for multimodal understanding and generation","author":"Wang","year":"2024"},{"key":"10.1016\/j.patcog.2026.113672_b20","doi-asserted-by":"crossref","unstructured":"Z. Huang, X. Shi, Z. Hao, T. Bu, J. Ding, Z. Yu, T. Huang, Towards High-performance Spiking Transformers from ANN to SNN Conversion, in: Proceedings of the 32nd ACM International Conference on Multimedia, 2024, pp. 10688\u201310697.","DOI":"10.1145\/3664647.3680620"},{"key":"10.1016\/j.patcog.2026.113672_b21","unstructured":"Z. Huang, W. Fang, T. Bu, P. Xue, Z. Hao, W. Liu, Y. Tang, Z. Yu, T. Huang, Differential Coding for Training-Free ANN-to-SNN Conversion, in: International Conference on Machine Learning, ICML, 2025."},{"key":"10.1016\/j.patcog.2026.113672_b22","unstructured":"T. Bu, W. Fang, J. Ding, P. Dai, Z. Yu, T. Huang, Optimal ANN-SNN conversion for high-accuracy and ultra-low-latency spiking neural networks, in: International Conference on Learning Representations, ICLR, 2022."},{"key":"10.1016\/j.patcog.2026.113672_b23","series-title":"Temporalmaxer: Maximize temporal context with only max pooling for temporal action localization","author":"Tang","year":"2023"},{"key":"10.1016\/j.patcog.2026.113672_b24","series-title":"IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2021, Virtual, June 19-25, 2021","first-page":"3320","article-title":"Learning salient boundary feature for anchor-free temporal action localization","author":"Lin","year":"2021"},{"key":"10.1016\/j.patcog.2026.113672_b25","first-page":"11","article-title":"Optimized potential initialization for low-latency spiking neural networks","volume":"vol. 36","author":"Bu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113672_b26","unstructured":"Z. Hao, J. Ding, T. Bu, T. Huang, Z. Yu, Bridging the gap between anns and snns by calibrating offset spikes, in: International Conference on Learning Representations, ICLR, 2023."},{"key":"10.1016\/j.patcog.2026.113672_b27","series-title":"European Conference on Computer Vision","first-page":"388","article-title":"Deep spiking neural network: Energy efficiency through time based coding","author":"Han","year":"2020"},{"key":"10.1016\/j.patcog.2026.113672_b28","first-page":"11","article-title":"Reducing ann-snn conversion error through residual membrane potential","volume":"vol. 37 no. 1","author":"Hao","year":"2023"},{"issue":"5","key":"10.1016\/j.patcog.2026.113672_b29","doi-asserted-by":"crossref","first-page":"1947","DOI":"10.1109\/TNNLS.2021.3110991","article-title":"Rectified linear postsynaptic potential function for backpropagation in deep spiking neural networks","volume":"33","author":"Zhang","year":"2021","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"10.1016\/j.patcog.2026.113672_b30","first-page":"23426","article-title":"Differentiable spike: Rethinking gradient-descent for training spiking neural networks","volume":"34","author":"Li","year":"2021","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113672_b31","series-title":"European Conference on Computer Vision","first-page":"36","article-title":"Reducing information loss for spiking neural networks","author":"Guo","year":"2022"},{"key":"10.1016\/j.patcog.2026.113672_b32","first-page":"156","article-title":"Im-loss: information maximization loss for spiking neural networks","volume":"35","author":"Guo","year":"2022","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"10.1016\/j.patcog.2026.113672_b33","first-page":"51708","article-title":"Enof-snn: Training accurate spiking neural networks via enhancing the output feature","volume":"37","author":"Guo","year":"2024","journal-title":"Adv. Neural Inf. Process. Syst."},{"issue":"6","key":"10.1016\/j.patcog.2026.113672_b34","doi-asserted-by":"crossref","first-page":"51","DOI":"10.1109\/MSP.2019.2931595","article-title":"Surrogate gradient learning in spiking neural networks: Bringing the power of gradient-based optimization to spiking neural networks","volume":"36","author":"Neftci","year":"2019","journal-title":"IEEE Signal Process. Mag."},{"issue":"1","key":"10.1016\/j.patcog.2026.113672_b35","doi-asserted-by":"crossref","first-page":"25","DOI":"10.1146\/annurev.neuro.31.060407.125639","article-title":"Spike timing\u2013dependent plasticity: a hebbian learning rule","volume":"31","author":"Caporale","year":"2008","journal-title":"Annu. Rev. Neurosci."},{"key":"10.1016\/j.patcog.2026.113672_b36","article-title":"Quantization framework for fast spiking neural networks","volume":"16","author":"Li","year":"2022","journal-title":"Front. Neurosci."},{"key":"10.1016\/j.patcog.2026.113672_b37","series-title":"IEEE International Conference on Computer Vision, ICCV 2017, Venice, Italy, October 22-29, 2017","first-page":"2999","article-title":"Focal loss for dense object detection","author":"Lin","year":"2017"},{"key":"10.1016\/j.patcog.2026.113672_b38","first-page":"12993","article-title":"Distance-iou loss: Faster and better learning for bounding box regression","volume":"vol. 34","author":"Zheng","year":"2020"},{"issue":"6197","key":"10.1016\/j.patcog.2026.113672_b39","doi-asserted-by":"crossref","first-page":"668","DOI":"10.1126\/science.1254642","article-title":"A million spiking-neuron integrated circuit with a scalable communication network and interface","volume":"345","author":"Merolla","year":"2014","journal-title":"Science"},{"key":"10.1016\/j.patcog.2026.113672_b40","series-title":"2014 IEEE International Solid-State Circuits Conference Digest of Technical Papers","first-page":"10","article-title":"1.1 Computing\u2019s energy problem (and what we can do about it)","author":"Horowitz","year":"2014"},{"key":"10.1016\/j.patcog.2026.113672_b41","series-title":"Training full spike neural networks via auxiliary accumulation pathway","author":"Chen","year":"2023"},{"key":"10.1016\/j.patcog.2026.113672_b42","series-title":"7th International Conference on Learning Representations, ICLR 2019, New Orleans, la, USA, May 6-9, 2019","article-title":"Decoupled weight decay regularization","author":"Loshchilov","year":"2019"},{"key":"10.1016\/j.patcog.2026.113672_b43","series-title":"Proceedings of the IEEE\/CVF International Conference on Computer Vision","first-page":"6202","article-title":"Slowfast networks for video recognition","author":"Feichtenhofer","year":"2019"},{"key":"10.1016\/j.patcog.2026.113672_b44","doi-asserted-by":"crossref","unstructured":"D. Shi, Y. Zhong, Q. Cao, L. Ma, J. Li, D. Tao, TriDet: Temporal action detection with relative boundary modeling, in: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, 2023, pp. 18857\u201318866.","DOI":"10.1109\/CVPR52729.2023.01808"},{"key":"10.1016\/j.patcog.2026.113672_b45","series-title":"Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition","first-page":"3202","article-title":"Video swin transformer","author":"Liu","year":"2022"},{"key":"10.1016\/j.patcog.2026.113672_b46","series-title":"European Conference on Computer Vision","first-page":"253","article-title":"Integer-valued training and spike-driven inference spiking neural network for high-performance and energy-efficient object detection","author":"Luo","year":"2024"},{"key":"10.1016\/j.patcog.2026.113672_b47","first-page":"11270","article-title":"Spiking-YOLO: spiking neural network for energy-efficient object detection","volume":"vol. 34","author":"Kim","year":"2020"},{"key":"10.1016\/j.patcog.2026.113672_b48","doi-asserted-by":"crossref","unstructured":"H. Alwassel, F.C. Heilbron, V. Escorcia, B. Ghanem, Diagnosing error in temporal action detectors, in: Proceedings of the European Conference on Computer Vision, ECCV, 2018, pp. 256\u2013272.","DOI":"10.1007\/978-3-030-01219-9_16"}],"container-title":["Pattern Recognition"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326006370?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0031320326006370?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,4,16]],"date-time":"2026-04-16T12:54:23Z","timestamp":1776344063000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0031320326006370"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,11]]},"references-count":48,"alternative-id":["S0031320326006370"],"URL":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113672","relation":{},"ISSN":["0031-3203"],"issn-type":[{"value":"0031-3203","type":"print"}],"subject":[],"published":{"date-parts":[[2026,11]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"SpikeTAD: Spiking neural networks for end-to-end temporal action detection","name":"articletitle","label":"Article Title"},{"value":"Pattern Recognition","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.patcog.2026.113672","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"113672"}}