{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,1,13]],"date-time":"2026-01-13T02:33:38Z","timestamp":1768271618165,"version":"3.49.0"},"publisher-location":"Singapore","reference-count":43,"publisher":"Springer Nature Singapore","isbn-type":[{"value":"9789819556755","type":"print"},{"value":"9789819556762","type":"electronic"}],"license":[{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2026,1,1]],"date-time":"2026-01-01T00:00:00Z","timestamp":1767225600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2026]]},"DOI":"10.1007\/978-981-95-5676-2_17","type":"book-chapter","created":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:22Z","timestamp":1768249942000},"page":"248-261","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":0,"title":["PACTFormer: Peak-Aware Cross-Temporal Transformer for\u00a0Temporal Action Detection"],"prefix":"10.1007","author":[{"given":"Zhewen","family":"Zhou","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Bing","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Qing","family":"Guo","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Chunlei","family":"Li","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Guangshuai","family":"Gao","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2026,1,13]]},"reference":[{"key":"17_CR1","doi-asserted-by":"crossref","unstructured":"An, X., Zhao, P., Wang, G., Zhao, C., Yang, S.: Transformer feature collapse of temporal action detection via multi-granularity semantic enhancement. Neurocomputing, 129543 (2025)","DOI":"10.1016\/j.neucom.2025.129543"},{"key":"17_CR2","doi-asserted-by":"crossref","unstructured":"Caba\u00a0Heilbron, F., Escorcia, V., Ghanem, B., Carlos\u00a0Niebles, J.: Activitynet: A large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"17_CR3","doi-asserted-by":"crossref","unstructured":"Carion, N., Massa, F., Synnaeve, G., Usunier, N., Kirillov, A., Zagoruyko, S.: End-to-end object detection with transformers. In: Proceedings of the European Conference on Computer Vision (ECCV) (2020)","DOI":"10.1007\/978-3-030-58452-8_13"},{"key":"17_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"17_CR5","doi-asserted-by":"crossref","unstructured":"Cheng, F., Bertasius, G.: Tallformer: Temporal action localization with a long-memory transformer. In: Proceedings of the European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-19830-4_29"},{"key":"17_CR6","unstructured":"Dao, T., Fu, D., Ermon, S., Rudra, A., R\u00e9, C.: Flashattention: Fast and memory-efficient exact attention with io-awareness. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"17_CR7","unstructured":"Dong, Y., Cordonnier, J.B., Loukas, A.: Attention is not all you need: Pure attention loses rank doubly exponentially with depth. In: Proceedings of the International Conference on Machine Learning (ICML) (2021)"},{"key":"17_CR8","unstructured":"Dosovitskiy, A., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. In: Proceedings of the International Conference on Learning Representations(ICLR) (2021)"},{"key":"17_CR9","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"17_CR10","unstructured":"Jiang, Y.G., et al.: THUMOS challenge: Action recognition with a large number of classes (2014). http:\/\/crcv.ucf.edu\/THUMOS14\/"},{"key":"17_CR11","doi-asserted-by":"publisher","first-page":"226","DOI":"10.1016\/j.engappai.2017.10.001","volume":"67","author":"KE Ko","year":"2018","unstructured":"Ko, K.E., Sim, K.B.: Deep convolutional framework for abnormal behavior detection in a smart surveillance system. Eng. Appl. Artif. Intell. 67, 226\u2013234 (2018)","journal-title":"Eng. Appl. Artif. Intell."},{"key":"17_CR12","doi-asserted-by":"crossref","unstructured":"Li, Q., Zu, G., Xu, H., Kong, J., Zhang, Y., Wang, J.: An adaptive dual selective transformer for temporal action localization. IEEE Trans. Multimedia (2024)","DOI":"10.1109\/TMM.2024.3367599"},{"key":"17_CR13","doi-asserted-by":"crossref","unstructured":"Lin, C., et al.: Learning salient boundary feature for anchor-free temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00333"},{"key":"17_CR14","doi-asserted-by":"crossref","unstructured":"Lin, T., Liu, X., Li, X., Ding, E., Wen, S.: Bmn: Boundary-matching network for temporal action proposal generation. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00399"},{"key":"17_CR15","doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: Bsn: Boundary sensitive network for temporal action proposal generation. In: Proceedings of the European Conference on Computer Vision (ECCV) (2018)","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"17_CR16","doi-asserted-by":"crossref","unstructured":"Liu, Q., Wang, Z.: Progressive boundary refinement network for temporal action detection. In: Proceedings of the AAAI Conference on Artificial Intelligence (AAAI) (2020)","DOI":"10.1609\/aaai.v34i07.6829"},{"key":"17_CR17","doi-asserted-by":"crossref","unstructured":"Liu, S., Zhang, C.L., Zhao, C., Ghanem, B.: End-to-end temporal action detection with 1b parameters across 1000 frames. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.01759"},{"key":"17_CR18","doi-asserted-by":"crossref","unstructured":"Liu, X., Bai, S., Bai, X.: An empirical study of end-to-end temporal action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2022)","DOI":"10.1109\/CVPR52688.2022.01938"},{"key":"17_CR19","doi-asserted-by":"publisher","first-page":"5427","DOI":"10.1109\/TIP.2022.3195321","volume":"31","author":"X Liu","year":"2022","unstructured":"Liu, X., et al.: End-to-end temporal action detection with transformer. IEEE Trans. Image Process. 31, 5427\u20135441 (2022)","journal-title":"IEEE Trans. Image Process."},{"issue":"8","key":"17_CR20","doi-asserted-by":"publisher","first-page":"2000","DOI":"10.1109\/TMM.2018.2794265","volume":"20","author":"A Tejero-de Pablos","year":"2018","unstructured":"Tejero-de Pablos, A., Nakashima, Y., Sato, T., Yokoya, N., Linna, M., Rahtu, E.: Summarization of user-generated sports video by using deep action recognition features. IEEE Trans. Multimedia 20(8), 2000\u20132011 (2018)","journal-title":"IEEE Trans. Multimedia"},{"key":"17_CR21","doi-asserted-by":"crossref","unstructured":"Qing, Z., et al.: Temporal context aggregation network for temporal action proposal refinement. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2021)","DOI":"10.1109\/CVPR46437.2021.00055"},{"key":"17_CR22","doi-asserted-by":"crossref","unstructured":"Sardari, F., Mustafa, A., Jackson, P.J., Hilton, A.: Pat: Position-aware transformer for dense multi-label action detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2023)","DOI":"10.1109\/ICCVW60793.2023.00321"},{"issue":"11","key":"17_CR23","doi-asserted-by":"publisher","first-page":"12922","DOI":"10.1109\/TPAMI.2023.3243465","volume":"45","author":"J Selva","year":"2023","unstructured":"Selva, J., Johansen, A.S., Escalera, S., Nasrollahi, K., Moeslund, T.B., Clap\u00e9s, A.: Video transformers: A survey. IEEE Trans. Pattern Anal. Mach. Intell. 45(11), 12922\u201312943 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"17_CR24","unstructured":"Shen, Z., Zhang, M., Zhao, H., Yi, S., Li, H.: Efficient attention: Attention with linear complexities. In: Proceedings of the IEEE\/CVF Winter Conference on Applications of Computer Vision (WACV) (2021)"},{"key":"17_CR25","doi-asserted-by":"crossref","unstructured":"Shi, D., Zhong, Y., Cao, Q., Ma, L., Li, J., Tao, D.: Tridet: Temporal action detection with relative boundary modeling. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2023)","DOI":"10.1109\/CVPR52729.2023.01808"},{"key":"17_CR26","doi-asserted-by":"crossref","unstructured":"Shi, D., et al.: React: Temporal action detection with relational queries. In: Proceedings of the European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-20080-9_7"},{"key":"17_CR27","doi-asserted-by":"crossref","unstructured":"Sigurdsson, G.A., Varol, G., Wang, X., Farhadi, A., Laptev, I., Gupta, A.: Hollywood in homes: Crowdsourcing data collection for activity understanding. In: Proceedings of the European Conference on Computer Vision (ECCV) (2016)","DOI":"10.1007\/978-3-319-46448-0_31"},{"key":"17_CR28","unstructured":"Tan, J., Zhao, X., Shi, X., Kang, B., Wang, L.: Pointtad: Multi-label temporal action detection with learnable query points. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"17_CR29","unstructured":"Tong, Z., Song, Y., Wang, J., Wang, L.: Videomae: Masked autoencoders are data-efficient learners for self-supervised video pre-training. In: Proceedings of the Advances in Neural Information Processing Systems (NeurIPS) (2022)"},{"key":"17_CR30","doi-asserted-by":"publisher","first-page":"14","DOI":"10.3389\/frobt.2017.00014","volume":"4","author":"A Vignolo","year":"2017","unstructured":"Vignolo, A., Noceti, N., Rea, F., Sciutti, A., Odone, F., Sandini, G.: Detecting biological motion for human-robot interaction: A link between perception and action. Front. Rob. AI 4, 14 (2017)","journal-title":"Front. Rob. AI"},{"key":"17_CR31","unstructured":"Wang, C., Cai, H., Zou, Y., Xiong, Y.: Rgb stream is enough for temporal action detection. arXiv preprint arXiv:2107.04362 (2021)"},{"key":"17_CR32","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Videomae v2: Scaling video masked autoencoders with dual masking. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2023)","DOI":"10.1109\/CVPR52729.2023.01398"},{"key":"17_CR33","doi-asserted-by":"crossref","unstructured":"Wang, L., et al.: Temporal segment networks: Towards good practices for deep action recognition. In: Proceedings of the European Conference on Computer Vision (ECCV) (2016)","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"17_CR34","unstructured":"Wang, Y., et\u00a0al.: Internvideo: General video foundation models via generative and discriminative learning. arXiv preprint arXiv:2212.03191 (2022)"},{"key":"17_CR35","doi-asserted-by":"crossref","unstructured":"Xu, M., Zhao, C., Rojas, D.S., Thabet, A., Ghanem, B.: G-tad: Sub-graph localization for temporal action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2020)","DOI":"10.1109\/CVPR42600.2020.01017"},{"key":"17_CR36","doi-asserted-by":"publisher","DOI":"10.1016\/j.cviu.2023.103692","volume":"232","author":"M Yang","year":"2023","unstructured":"Yang, M., Chen, G., Zheng, Y.D., Lu, T., Wang, L.: Basictad: an astounding RGB-only baseline for temporal action detection. Comput. Vis. Image Underst. 232, 103692 (2023)","journal-title":"Comput. Vis. Image Underst."},{"key":"17_CR37","doi-asserted-by":"crossref","unstructured":"Yang, M., Gao, H., Guo, P., Wang, L.: Adapting short-term transformers for action detection in untrimmed videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.01757"},{"key":"17_CR38","unstructured":"Yang, T., Zhu, Y., Xie, Y., Zhang, A., Chen, C., Li, M.: Aim: Adapting image models for efficient video understanding. In: Proceedings of the International Conference on Learning Representations(ICLR) (2023)"},{"key":"17_CR39","doi-asserted-by":"crossref","unstructured":"Zeng, R., et al.: Graph convolutional networks for temporal action localization. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision (ICCV) (2019)","DOI":"10.1109\/ICCV.2019.00719"},{"key":"17_CR40","doi-asserted-by":"crossref","unstructured":"Zhang, C.L., Wu, J., Li, Y.: Actionformer: Localizing moments of actions with transformers. In: Proceedings of the European Conference on Computer Vision (ECCV) (2022)","DOI":"10.1007\/978-3-031-19772-7_29"},{"issue":"10","key":"17_CR41","doi-asserted-by":"publisher","first-page":"3989","DOI":"10.1109\/TNNLS.2019.2951680","volume":"31","author":"B Zhao","year":"2019","unstructured":"Zhao, B., Li, X., Lu, X.: Property-constrained dual learning for video summarization. IEEE Trans. Neural Netw. Learn. Syst. 31(10), 3989\u20134000 (2019)","journal-title":"IEEE Trans. Neural Netw. Learn. Syst."},{"key":"17_CR42","doi-asserted-by":"crossref","unstructured":"Zhao, C., Liu, S., Mangalam, K., Ghanem, B.: Re2tal: Rewiring pretrained video backbones for reversible temporal action localization. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2023)","DOI":"10.1109\/CVPR52729.2023.01025"},{"key":"17_CR43","doi-asserted-by":"crossref","unstructured":"Zhu, Y., Zhang, G., Tan, J., Wu, G., Wang, L.: Dual DETRS for multi-label temporal action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR) (2024)","DOI":"10.1109\/CVPR52733.2024.01756"}],"container-title":["Lecture Notes in Computer Science","Pattern Recognition and Computer Vision"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-981-95-5676-2_17","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,1,12]],"date-time":"2026-01-12T20:32:27Z","timestamp":1768249947000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-981-95-5676-2_17"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2026]]},"ISBN":["9789819556755","9789819556762"],"references-count":43,"URL":"https:\/\/doi.org\/10.1007\/978-981-95-5676-2_17","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"value":"0302-9743","type":"print"},{"value":"1611-3349","type":"electronic"}],"subject":[],"published":{"date-parts":[[2026]]},"assertion":[{"value":"13 January 2026","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}},{"value":"PRCV","order":1,"name":"conference_acronym","label":"Conference Acronym","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Chinese Conference on Pattern Recognition and Computer Vision  (PRCV)","order":2,"name":"conference_name","label":"Conference Name","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"Shanghai","order":3,"name":"conference_city","label":"Conference City","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"China","order":4,"name":"conference_country","label":"Conference Country","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"2025","order":5,"name":"conference_year","label":"Conference Year","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"15 October 2025","order":7,"name":"conference_start_date","label":"Conference Start Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"18 October 2025","order":8,"name":"conference_end_date","label":"Conference End Date","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"8","order":9,"name":"conference_number","label":"Conference Number","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"ccprcv2025","order":10,"name":"conference_id","label":"Conference ID","group":{"name":"ConferenceInfo","label":"Conference Information"}},{"value":"http:\/\/2025.prcv.cn\/index.asp","order":11,"name":"conference_url","label":"Conference URL","group":{"name":"ConferenceInfo","label":"Conference Information"}}]}}