{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T16:50:26Z","timestamp":1772643026883,"version":"3.50.1"},"reference-count":108,"publisher":"Elsevier BV","license":[{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/tdm\/userlicense\/1.0\/"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.elsevier.com\/legal\/tdmrep-license"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-017"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-012"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2026,6,1]],"date-time":"2026-06-01T00:00:00Z","timestamp":1780272000000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-004"}],"funder":[{"DOI":"10.13039\/100012542","name":"Sichuan Province Science and Technology Support Program","doi-asserted-by":"publisher","id":[{"id":"10.13039\/100012542","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["elsevier.com","sciencedirect.com"],"crossmark-restriction":true},"short-container-title":["Neural Networks"],"published-print":{"date-parts":[[2026,6]]},"DOI":"10.1016\/j.neunet.2026.108627","type":"journal-article","created":{"date-parts":[[2026,1,21]],"date-time":"2026-01-21T16:42:44Z","timestamp":1769013764000},"page":"108627","update-policy":"https:\/\/doi.org\/10.1016\/elsevier_cm_policy","source":"Crossref","is-referenced-by-count":0,"special_numbering":"C","title":["Cross-category spatiotemporal consensus and discriminative networks for weakly-supervised temporal action localization"],"prefix":"10.1016","volume":"198","author":[{"ORCID":"https:\/\/orcid.org\/0000-0002-8217-3268","authenticated-orcid":false,"given":"Kunlun","family":"Wu","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8396-5710","authenticated-orcid":false,"given":"Donghai","family":"Zhai","sequence":"additional","affiliation":[]}],"member":"78","reference":[{"key":"10.1016\/j.neunet.2026.108627_bib0001","series-title":"International conference on machine learning","first-page":"475","article-title":"Forecasting sequential data using consistent koopman autoencoders","author":"Azencot","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0002","series-title":"Computer vision\u2013ECCV 2020: 16th european conference, glasgow, UK, august 23\u201328, 2020, proceedings, part XXVIII 16","first-page":"121","article-title":"Boundary content graph neural network for temporal action proposal generation","author":"Bai","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0003","series-title":"Data-driven science and engineering: Machine learning, dynamical systems, and control","author":"Brunton","year":"2022"},{"key":"10.1016\/j.neunet.2026.108627_bib0004","series-title":"Proceedings of the ieee conference on computer vision and pattern recognition","first-page":"961","article-title":"Activitynet: A large-scale video benchmark for human activity understanding","author":"Caba Heilbron","year":"2015"},{"key":"10.1016\/j.neunet.2026.108627_bib0005","series-title":"2017\u202fIEEE Conference on computer vision and pattern recognition (CVPR)","first-page":"4724","article-title":"Quo vadis, action recognition? a new model and the kinetics dataset","author":"Carreira","year":"2017"},{"key":"10.1016\/j.neunet.2026.108627_bib0006","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"6299","article-title":"Quo vadis, action recognition? a new model and the kinetics dataset","author":"Carreira","year":"2017"},{"key":"10.1016\/j.neunet.2026.108627_bib0007","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"1130","article-title":"Rethinking the faster R-cnn architecture for temporal action localization","author":"Chao","year":"2018"},{"issue":"12","key":"10.1016\/j.neunet.2026.108627_bib0008","doi-asserted-by":"crossref","first-page":"15896","DOI":"10.1109\/TPAMI.2023.3308571","article-title":"Uncertainty-aware dual-evidential learning for weakly-supervised temporal action localization","volume":"45","author":"Chen","year":"2023","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.108627_bib0009","series-title":"European conference on computer vision","first-page":"192","article-title":"Dual-evidential learning for weakly-supervised temporal action localization","author":"Chen","year":"2022"},{"key":"10.1016\/j.neunet.2026.108627_bib0010","series-title":"International conference on machine learning","first-page":"1597","article-title":"A simple framework for contrastive learning of visual representations","author":"Chen","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0011","doi-asserted-by":"crossref","first-page":"4349","DOI":"10.1109\/TMM.2022.3174344","article-title":"Multi-dimensional attention with similarity constraint for weakly-supervised temporal action localization","volume":"25","author":"Chen","year":"2022","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0012","doi-asserted-by":"crossref","first-page":"4349","DOI":"10.1109\/TMM.2022.3174344","article-title":"Multi-dimensional attention with similarity constraint for weakly-supervised temporal action localization","volume":"25","author":"Chen","year":"2023","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0013","doi-asserted-by":"crossref","first-page":"6869","DOI":"10.1109\/TIP.2021.3099407","article-title":"Kfc: An efficient framework for semi-supervised temporal action localization","volume":"30","author":"Ding","year":"2021","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2026.108627_bib0014","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.107115","article-title":"Ssim over mse: A new perspective for video anomaly detection","volume":"185","author":"Fan","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108627_bib0015","doi-asserted-by":"crossref","first-page":"19967","DOI":"10.1109\/CVPR52688.2022.01937","article-title":"Fine-grained temporal contrastive learning for weakly-supervised temporal action localization","author":"Gao","year":"2022","journal-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"issue":"12","key":"10.1016\/j.neunet.2026.108627_bib0016","doi-asserted-by":"crossref","first-page":"15949","DOI":"10.1109\/TPAMI.2023.3311447","article-title":"Vectorized evidential learning for weakly-supervised temporal action localization","volume":"45","author":"Gao","year":"2023","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.108627_bib0017","doi-asserted-by":"crossref","first-page":"15949","DOI":"10.1109\/TPAMI.2023.3311447","article-title":"Vectorized evidential learning for weakly-supervised temporal action localization","volume":"45","author":"Gao","year":"2023","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.108627_bib0018","doi-asserted-by":"crossref","first-page":"13915","DOI":"10.1109\/CVPR52688.2022.01355","article-title":"Asm-loc: Action-aware segment modeling for weakly-supervised temporal action localization","author":"He","year":"2022","journal-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"10.1016\/j.neunet.2026.108627_bib0019","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"13925","article-title":"Asm-loc: Action-aware segment modeling for weakly-supervised temporal action localization","author":"He","year":"2022"},{"key":"10.1016\/j.neunet.2026.108627_bib0020","doi-asserted-by":"crossref","DOI":"10.1145\/3474085.3475298","article-title":"Cross-modal consensus network for weakly supervised temporal action localization","author":"Hong","year":"2021","journal-title":"Proceedings of the 29th ACM International Conference on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0021","series-title":"Proceedings of the 29th ACM international conference on multimedia","first-page":"1591","article-title":"Cross-modal consensus network for weakly supervised temporal action localization","author":"Hong","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0022","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"2704","article-title":"Weakly-supervised temporal action localization with multi-modal plateau transformers","author":"Hu","year":"2024"},{"key":"10.1016\/j.neunet.2026.108627_bib0023","doi-asserted-by":"crossref","first-page":"207","DOI":"10.1109\/TCSVT.2023.3283430","article-title":"Learning proposal-aware re-ranking for weakly-supervised temporal action localization","volume":"34","author":"Hu","year":"2023","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108627_bib0024","article-title":"Prototype-guided and dynamic-aware video anomaly detection","volume":"26","author":"Huang","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108627_bib0025","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"11053","article-title":"Relational prototypical network for weakly supervised temporal action localization","volume":"vol. 34","author":"Huang","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0026","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"8002","article-title":"Foreground-action consistency network for weakly supervised temporal action localization","author":"Huang","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0027","series-title":"Foreground-action consistency network for weakly supervised temporal action localization","first-page":"7982","author":"Huang","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0028","doi-asserted-by":"crossref","first-page":"3262","DOI":"10.1109\/CVPR52688.2022.00327","article-title":"Weakly supervised temporal action localization via representative snippet knowledge propagation","author":"Huang","year":"2022","journal-title":"2022 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"10.1016\/j.neunet.2026.108627_bib0029","doi-asserted-by":"crossref","DOI":"10.1016\/j.psep.2025.108113","article-title":"Large-scale chemical language models for accurate and interpretable prediction of reactivity and toxicity","volume":"204","author":"Huang","year":"2025","journal-title":"Process Safety and Environmental Protection"},{"key":"10.1016\/j.neunet.2026.108627_bib0030","doi-asserted-by":"crossref","first-page":"1","DOI":"10.1016\/j.cviu.2016.10.018","article-title":"The thumos challenge on action recognition for videos \u201din the wild\u201d","volume":"155","author":"Idrees","year":"2017","journal-title":"Computer Vision and Image Understanding"},{"key":"10.1016\/j.neunet.2026.108627_bib0031","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"1637","article-title":"A hybrid attention mechanism for weakly-supervised temporal action localization","volume":"vol. 35","author":"Islam","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0032","unstructured":"Islam, A., Long, C., & Radke, R. J. (2021b). A hybrid attention mechanism for weakly-supervised temporal action localization. ArXiv, abs\/2101.00545. https:\/\/api.semanticscholar.org\/CorpusID:230435678."},{"key":"10.1016\/j.neunet.2026.108627_bib0033","doi-asserted-by":"crossref","first-page":"6688","DOI":"10.1109\/TMM.2022.3213478","article-title":"Adaptive mutual supervision for weakly-supervised temporal action localization","volume":"25","author":"Ju","year":"2022","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0034","unstructured":"Kay, W., Carreira, J., Simonyan, K., Zhang, B., Hillier, C., Vijayanarasimhan, S., Viola, F., Green, T., Back, T., Natsev, P., Suleyman, M., & Zisserman, A. (2017). The kinetics human action video dataset. https:\/\/arxiv.org\/abs\/1705.06950."},{"issue":"5","key":"10.1016\/j.neunet.2026.108627_bib0035","doi-asserted-by":"crossref","first-page":"315","DOI":"10.1073\/pnas.17.5.315","article-title":"Hamiltonian systems and transformation in hilbert space","volume":"17","author":"Koopman","year":"1931","journal-title":"Proceedings of the National Academy of Sciences"},{"key":"10.1016\/j.neunet.2026.108627_bib0036","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"3524","article-title":"Hide-and-seek: Forcing a network to be meticulous for weakly-supervised object and action localization","author":"Kumar Singh","year":"2017"},{"key":"10.1016\/j.neunet.2026.108627_bib0037","series-title":"2021\u202fIEEE\/CVF International conference on computer vision (ICCV)","first-page":"13628","article-title":"Learning action completeness from points for weakly-supervised temporal action localization","author":"Lee","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0038","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"11320","article-title":"Background suppression network for weakly-supervised temporal action localization","volume":"vol. 34","author":"Lee","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0039","doi-asserted-by":"crossref","first-page":"1854","DOI":"10.1609\/aaai.v35i3.16280","article-title":"Weakly-supervised temporal action localization by uncertainty modeling","volume":"35","author":"Lee","year":"2021","journal-title":"Proceedings of the AAAI Conference on Artificial Intelligence"},{"key":"10.1016\/j.neunet.2026.108627_bib0040","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"19914","article-title":"Exploring denoised cross-video contrast for weakly-supervised temporal action localization","author":"Li","year":"2022"},{"key":"10.1016\/j.neunet.2026.108627_bib0041","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106307","article-title":"Weakly supervised temporal action localization with actionness-guided false positive suppression","volume":"175","author":"Li","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108627_bib0042","doi-asserted-by":"crossref","first-page":"3319","DOI":"10.1109\/CVPR46437.2021.00333","article-title":"Learning salient boundary feature for anchor-free temporal action localization","author":"Lin","year":"2021","journal-title":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"10.1016\/j.neunet.2026.108627_bib0043","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"3889","article-title":"Bmn: Boundary-matching network for temporal action proposal generation","author":"Lin","year":"2019"},{"key":"10.1016\/j.neunet.2026.108627_bib0044","series-title":"Computer vision \u2013 ECCV 2018","first-page":"3","article-title":"Bsn: Boundary sensitive network for temporal action proposal generation","author":"Lin","year":"2018"},{"key":"10.1016\/j.neunet.2026.108627_bib0045","series-title":"Proceedings of the European conference on computer vision (ECCV)","first-page":"3","article-title":"Bsn: Boundary sensitive network for temporal action proposal generation","author":"Lin","year":"2018"},{"key":"10.1016\/j.neunet.2026.108627_bib0046","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1298","article-title":"Completeness modeling and context separation for weakly supervised temporal action localization","author":"Liu","year":"2019"},{"key":"10.1016\/j.neunet.2026.108627_bib0047","doi-asserted-by":"crossref","first-page":"5427","DOI":"10.1109\/TIP.2022.3195321","article-title":"End-to-end temporal action detection with transformer","volume":"31","author":"Liu","year":"2022","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2026.108627_bib0048","series-title":"The blessings of unlabeled background in untrimmed videos","first-page":"6172","author":"Liu","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0049","first-page":"1","article-title":"Adaptive prototype learning for weakly-supervised temporal action localization","author":"Luo","year":"2024","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2026.108627_bib0050","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"9969","article-title":"Action unit memory network for weakly supervised temporal action localization","author":"Luo","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0051","series-title":"2021\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"9964","article-title":"Action unit memory network for weakly supervised temporal action localization","author":"Luo","year":"2021"},{"issue":"1","key":"10.1016\/j.neunet.2026.108627_bib0052","doi-asserted-by":"crossref","first-page":"4950","DOI":"10.1038\/s41467-018-07210-0","article-title":"Deep learning for universal linear embeddings of nonlinear dynamics","volume":"9","author":"Lusch","year":"2018","journal-title":"Nature Communications"},{"key":"10.1016\/j.neunet.2026.108627_bib0053","series-title":"Computer vision\u2013ECCV 2020: 16th european conference, glasgow, UK, august 23\u201328, 2020, proceedings, part IV 16","first-page":"420","article-title":"Sf-net: Single-frame supervision for temporal action localization","author":"Ma","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0054","series-title":"Computer vision\u2013ECCV 2020: 16th european conference, glasgow, UK, august 23\u201328, 2020, proceedings, part XIV 16","first-page":"283","article-title":"Adversarial background-aware loss for weakly-supervised temporal activity localization","author":"Min","year":"2020"},{"issue":"11","key":"10.1016\/j.neunet.2026.108627_bib0055","doi-asserted-by":"crossref","first-page":"6939","DOI":"10.1109\/TCSVT.2023.3272891","article-title":"Collaborative foreground, background, and action modeling network for weakly supervised temporal action localization","volume":"33","author":"Moniruzzaman","year":"2023","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108627_bib0056","doi-asserted-by":"crossref","first-page":"270","DOI":"10.1109\/TMM.2023.3263965","article-title":"Feature weakening, contextualization, and discrimination for weakly supervised temporal action localization","volume":"26","author":"Moniruzzaman","year":"2024","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0057","first-page":"13588","article-title":"D2-Net: Weakly-supervised action localization via discriminative embeddings and denoised activations","author":"Narayan","year":"2020","journal-title":"2021 IEEE\/CVF International Conference on Computer Vision (ICCV)"},{"key":"10.1016\/j.neunet.2026.108627_bib0058","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"13608","article-title":"D2-Net: Weakly-supervised action localization via discriminative embeddings and denoised activations","author":"Narayan","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0059","series-title":"2019\u202fIEEE\/CVF International conference on computer vision (ICCV)","first-page":"8678","article-title":"3C-net: Category count and center loss for weakly-supervised action localization","author":"Narayan","year":"2019"},{"key":"10.1016\/j.neunet.2026.108627_bib0060","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"6752","article-title":"Weakly supervised action localization by sparse temporal pooling network","author":"Nguyen","year":"2018"},{"key":"10.1016\/j.neunet.2026.108627_bib0061","first-page":"6752","article-title":"Weakly supervised action localization by sparse temporal pooling network","author":"Nguyen","year":"2017","journal-title":"2018 IEEE\/CVF Conference on Computer Vision and Pattern Recognition"},{"key":"10.1016\/j.neunet.2026.108627_bib0062","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"5502","article-title":"Weakly-supervised action localization with background modeling","author":"Nguyen","year":"2019"},{"key":"10.1016\/j.neunet.2026.108627_bib0063","series-title":"European conference on computer vision","article-title":"W-TALC: Weakly-supervised temporal activity localization and classification","author":"Paul","year":"2018"},{"key":"10.1016\/j.neunet.2026.108627_bib0064","series-title":"Proceedings of the european conference on computer vision (ECCV)","first-page":"563","article-title":"W-Talc: Weakly-supervised temporal activity localization and classification","author":"Paul","year":"2018"},{"key":"10.1016\/j.neunet.2026.108627_bib0065","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2024.106560","article-title":"Robust visual question answering via polarity enhancement and contrast","volume":"179","author":"Peng","year":"2024","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108627_bib0066","unstructured":"Qu, S., Chen, G., Li, Z., Zhang, L., Lu, F., & Knoll, A. (2021). Acm-net: Action context modeling network for weakly-supervised temporal action localization. arXiv preprint arXiv: 2104.02967."},{"key":"10.1016\/j.neunet.2026.108627_bib0067","series-title":"2023\u202fIEEE\/CVF Conference on computer vision and pattern recognition (CVPR)","first-page":"2394","article-title":"Proposal-based multiple instance learning for weakly-supervised temporal action localization","author":"Ren","year":"2023"},{"key":"10.1016\/j.neunet.2026.108627_bib0068","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"22992","article-title":"Pivotal: Prior-driven supervision for weakly-supervised temporal action localization","author":"Rizve","year":"2023"},{"key":"10.1016\/j.neunet.2026.108627_bib0069","doi-asserted-by":"crossref","first-page":"6717","DOI":"10.1109\/TMM.2024.3355628","article-title":"Snippet-to-prototype contrastive consensus network for weakly supervised temporal action localization","volume":"26","author":"Shao","year":"2024","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0070","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"1009","article-title":"Weakly-supervised action localization by generative attention modeling","author":"Shi","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0071","doi-asserted-by":"crossref","first-page":"1379","DOI":"10.1109\/TIP.2023.3244411","article-title":"Stochasticformer: Stochastic modeling for weakly supervised temporal action localization","volume":"32","author":"Shi","year":"2023","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2026.108627_bib0072","series-title":"Proceedings of the european conference on computer vision (ECCV)","first-page":"154","article-title":"Autoloc: Weakly-supervised temporal action localization in untrimmed videos","author":"Shou","year":"2018"},{"issue":"1","key":"10.1016\/j.neunet.2026.108627_bib0073","doi-asserted-by":"crossref","first-page":"354","DOI":"10.1109\/TCSVT.2022.3201540","article-title":"Slow motion matters: A slow motion enhanced network for weakly supervised temporal action localization","volume":"33","author":"Sun","year":"2023","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108627_bib0074","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"4489","article-title":"Learning spatiotemporal features with 3d convolutional networks","author":"Tran","year":"2015"},{"key":"10.1016\/j.neunet.2026.108627_bib0075","series-title":"2017\u202fIEEE Conference on computer vision and pattern recognition (CVPR)","first-page":"6402","article-title":"Untrimmednets for weakly supervised action recognition and detection","author":"Wang","year":"2017"},{"key":"10.1016\/j.neunet.2026.108627_bib0076","series-title":"Proceedings of the IEEE conference on computer vision and pattern recognition","first-page":"4325","article-title":"Untrimmednets for weakly supervised action recognition and detection","author":"Wang","year":"2017"},{"key":"10.1016\/j.neunet.2026.108627_bib0077","series-title":"European conference on computer vision","first-page":"20","article-title":"Temporal segment networks: Towards good practices for deep action recognition","author":"Wang","year":"2016"},{"key":"10.1016\/j.neunet.2026.108627_bib0078","doi-asserted-by":"crossref","first-page":"272","DOI":"10.1016\/j.neunet.2023.08.057","article-title":"A multi-scale self-supervised hypergraph contrastive learning framework for video question answering","volume":"168","author":"Wang","year":"2023","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108627_bib0079","doi-asserted-by":"crossref","first-page":"9425","DOI":"10.1109\/TMM.2023.3252176","article-title":"Exploring action centers for temporal action localization","volume":"25","author":"Xia","year":"2023","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0080","article-title":"R-C3d: Region convolutional 3d network for temporal activity detection","author":"Xu","year":"2017","journal-title":"IEEE Computer Society"},{"key":"10.1016\/j.neunet.2026.108627_bib0081","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"5783","article-title":"R-C3d: Region convolutional 3d network for temporal activity detection","author":"Xu","year":"2017"},{"key":"10.1016\/j.neunet.2026.108627_bib0082","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"10156","article-title":"G-Tad: Sub-graph localization for temporal action detection","author":"Xu","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0083","series-title":"Proceedings of the thirty-third AAAI conference on artificial intelligence and thirty-first innovative applications of artificial intelligence conference and ninth AAAI symposium on educational advances in artificial intelligence","article-title":"Segregated temporal assembly recurrent networks for weakly supervised multiple action detection","author":"Xu","year":"2019"},{"issue":"10","key":"10.1016\/j.neunet.2026.108627_bib0084","doi-asserted-by":"crossref","first-page":"11458","DOI":"10.1109\/TPAMI.2023.3284853","article-title":"Bilateral relation distillation for weakly supervised temporal action localization","volume":"45","author":"Xu","year":"2023","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"12","key":"10.1016\/j.neunet.2026.108627_bib0085","doi-asserted-by":"crossref","first-page":"9814","DOI":"10.1109\/TPAMI.2021.3132058","article-title":"Background-click supervision for temporal action localization","volume":"44","author":"Yang","year":"2021","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.108627_bib0086","doi-asserted-by":"crossref","first-page":"5848","DOI":"10.1109\/TIP.2021.3089361","article-title":"Multi-scale structure-aware network for weakly supervised temporal action detection","volume":"30","author":"Yang","year":"2021","journal-title":"IEEE Transactions on Image Processing"},{"key":"10.1016\/j.neunet.2026.108627_bib0087","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"53","article-title":"Uncertainty guided collaborative training for weakly supervised temporal action detection","author":"Yang","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0088","doi-asserted-by":"crossref","first-page":"53","DOI":"10.1109\/CVPR46437.2021.00012","article-title":"Uncertainty guided collaborative training for weakly supervised temporal action detection","author":"Yang","year":"2021","journal-title":"2021 IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"},{"key":"10.1016\/j.neunet.2026.108627_bib0089","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"3090","article-title":"Acgnet: Action complement graph network for weakly-supervised temporal action localization","volume":"vol. 36","author":"Yang","year":"2022"},{"key":"10.1016\/j.neunet.2026.108627_bib0090","doi-asserted-by":"crossref","DOI":"10.1016\/j.neunet.2025.107829","article-title":"Stealthy and efficient adversarial example attack on video retrieval systems","volume":"191","author":"Yao","year":"2025","journal-title":"Neural Networks"},{"key":"10.1016\/j.neunet.2026.108627_bib0091","unstructured":"Yuan, Y., Lyu, Y., Shen, X., Tsang, I. W., & Yeung, D.-Y. (2019). Marginalized average attentional network for weakly-supervised learning. arXiv preprint arXiv: 1905.08586."},{"key":"10.1016\/j.neunet.2026.108627_bib0092","doi-asserted-by":"crossref","DOI":"10.1016\/j.knosys.2024.112076","article-title":"Video and audio are images: A cross-modal mixer for original data on video\u2013audio retrieval","volume":"299","author":"Yuan","year":"2024","journal-title":"Knowledge-Based Systems"},{"key":"10.1016\/j.neunet.2026.108627_bib0093","series-title":"Proceedings of the AAAI conference on artificial intelligence","first-page":"6908","article-title":"Weakly-supervised temporal action localization by inferring salient snippet-feature","volume":"vol. 38","author":"Yun","year":"2024"},{"key":"10.1016\/j.neunet.2026.108627_bib0094","series-title":"Proceedings of the IEEE\/CVF international conference on computer vision","first-page":"7094","article-title":"Graph convolutional networks for temporal action localization","author":"Zeng","year":"2019"},{"issue":"10","key":"10.1016\/j.neunet.2026.108627_bib0095","doi-asserted-by":"crossref","first-page":"6209","DOI":"10.1109\/TPAMI.2021.3090167","article-title":"Graph convolutional module for temporal action localization in videos","volume":"44","author":"Zeng","year":"2021","journal-title":"IEEE Transactions on Pattern Analysis and Machine Intelligence"},{"issue":"10","key":"10.1016\/j.neunet.2026.108627_bib0096","doi-asserted-by":"crossref","first-page":"6209","DOI":"10.1109\/TPAMI.2021.3090167","article-title":"Graph convolutional module for temporal action localization in videos","volume":"44","author":"Zeng","year":"2022","journal-title":"IEEE Transactions on Pattern Analysis &amp; Machine Intelligence"},{"key":"10.1016\/j.neunet.2026.108627_bib0097","series-title":"Computer vision\u2013ECCV 2020: 16th european conference, glasgow, UK, august 23\u201328, 2020, proceedings, part VI 16","first-page":"37","article-title":"Two-stream consensus network for weakly-supervised temporal action localization","author":"Zhai","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0098","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"16010","article-title":"Cola: Weakly-supervised temporal action localization with snippet contrastive learning","author":"Zhang","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0099","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"9095","article-title":"Cross-view gait recognition with deep universal linear embeddings","author":"Zhang","year":"2021"},{"key":"10.1016\/j.neunet.2026.108627_bib0100","doi-asserted-by":"crossref","first-page":"4568","DOI":"10.1109\/TCSVT.2023.3341881","article-title":"Cross-video contextual knowledge exploration and exploitation for ambiguity reduction in weakly supervised temporal action localization","volume":"34","author":"Zhang","year":"2023","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108627_bib0101","doi-asserted-by":"crossref","first-page":"8476","DOI":"10.1109\/TMM.2024.3379887","article-title":"Integration of global and local knowledge for foreground enhancing in weakly supervised temporal action localization","volume":"26","author":"Zhang","year":"2024","journal-title":"IEEE Transactions on Multimedia"},{"issue":"4","key":"10.1016\/j.neunet.2026.108627_bib0102","doi-asserted-by":"crossref","first-page":"1852","DOI":"10.1109\/TNNLS.2019.2962815","article-title":"Adapnet: Adaptability decomposing encoder-decoder network for weakly supervised action recognition and localization","volume":"34","author":"Zhang","year":"2023","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"10.1016\/j.neunet.2026.108627_bib0103","first-page":"1","article-title":"DiffusionVMR: Diffusion model for joint video moment retrieval and highlight detection","author":"Zhao","year":"2024","journal-title":"IEEE Transactions on Neural Networks and Learning Systems"},{"key":"10.1016\/j.neunet.2026.108627_bib0104","series-title":"Computer vision\u2013ECCV 2020: 16th european conference, glasgow, UK, august 23\u201328, 2020, proceedings, part VIII 16","first-page":"539","article-title":"Bottom-up temporal action localization with mutual regularization","author":"Zhao","year":"2020"},{"key":"10.1016\/j.neunet.2026.108627_bib0105","series-title":"Proceedings of the IEEE international conference on computer vision","first-page":"2914","article-title":"Temporal action detection with structured segment networks","author":"Zhao","year":"2017"},{"key":"10.1016\/j.neunet.2026.108627_bib0106","doi-asserted-by":"crossref","first-page":"8253","DOI":"10.1109\/TMM.2023.3234362","article-title":"A novel action saliency and context-aware network for weakly-supervised temporal action localization","volume":"25","author":"Zhao","year":"2023","journal-title":"IEEE Transactions on Multimedia"},{"key":"10.1016\/j.neunet.2026.108627_bib0107","doi-asserted-by":"crossref","first-page":"7202","DOI":"10.1109\/TCSVT.2024.3374870","article-title":"A snippets relation and hard-snippets mask network for weakly-supervised temporal action localization","volume":"34","author":"Zhao","year":"2024","journal-title":"IEEE Transactions on Circuits and Systems for Video Technology"},{"key":"10.1016\/j.neunet.2026.108627_bib0108","series-title":"Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition","first-page":"23003","article-title":"Improving weakly supervised temporal action localization by bridging train-test gap in pseudo labels","author":"Zhou","year":"2023"}],"container-title":["Neural Networks"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000894?httpAccept=text\/xml","content-type":"text\/xml","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/api.elsevier.com\/content\/article\/PII:S0893608026000894?httpAccept=text\/plain","content-type":"text\/plain","content-version":"vor","intended-application":"text-mining"}],"deposited":{"date-parts":[[2026,3,4]],"date-time":"2026-03-04T12:34:34Z","timestamp":1772627674000},"score":1,"resource":{"primary":{"URL":"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0893608026000894"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026,6]]},"references-count":108,"alternative-id":["S0893608026000894"],"URL":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108627","relation":{},"ISSN":["0893-6080"],"issn-type":[{"value":"0893-6080","type":"print"}],"subject":[],"published":{"date-parts":[[2026,6]]},"assertion":[{"value":"Elsevier","name":"publisher","label":"This article is maintained by"},{"value":"Cross-category spatiotemporal consensus and discriminative networks for weakly-supervised temporal action localization","name":"articletitle","label":"Article Title"},{"value":"Neural Networks","name":"journaltitle","label":"Journal Title"},{"value":"https:\/\/doi.org\/10.1016\/j.neunet.2026.108627","name":"articlelink","label":"CrossRef DOI link to publisher maintained version"},{"value":"article","name":"content_type","label":"Content Type"},{"value":"\u00a9 2026 Elsevier Ltd. All rights are reserved, including those for text and data mining, AI training, and similar technologies.","name":"copyright","label":"Copyright"}],"article-number":"108627"}}