{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,11,2]],"date-time":"2025-11-02T16:45:03Z","timestamp":1762101903534,"version":"build-2065373602"},"reference-count":56,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T00:00:00Z","timestamp":1712707200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T00:00:00Z","timestamp":1712707200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2024,6]]},"DOI":"10.1007\/s11554-024-01454-4","type":"journal-article","created":{"date-parts":[[2024,4,10]],"date-time":"2024-04-10T03:29:12Z","timestamp":1712719752000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["F2S-Net: learning frame-to-segment prediction for online action detection"],"prefix":"10.1007","volume":"21","author":[{"given":"Yi","family":"Liu","sequence":"first","affiliation":[]},{"given":"Yu","family":"Qiao","sequence":"additional","affiliation":[]},{"given":"Yali","family":"Wang","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,4,10]]},"reference":[{"key":"1454_CR1","doi-asserted-by":"crossref","unstructured":"An, J., Kang, H., Han, S.H., Yang, M.H., Kim, S.J.: Miniroad: Minimal rnn framework for online action detection. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 10341\u201310350 (2023)","DOI":"10.1109\/ICCV51070.2023.00949"},{"key":"1454_CR2","doi-asserted-by":"crossref","unstructured":"Caba\u00a0Heilbron, F., Escorcia, V., Ghanem, B., Carlos\u00a0Niebles, J.: Activitynet: A large-scale video benchmark for human activity understanding. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 961\u2013970 (2015)","DOI":"10.1109\/CVPR.2015.7298698"},{"key":"1454_CR3","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: CVPR (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"1454_CR4","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 6299\u20136308 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"1454_CR5","doi-asserted-by":"crossref","unstructured":"Chao, Y.W., Vijayanarasimhan, S., Seybold, B., Ross, D.A., Deng, J., Sukthankar, R.: Rethinking the faster r-cnn architecture for temporal action localization. In: CVPR (2018)","DOI":"10.1109\/CVPR.2018.00124"},{"key":"1454_CR6","doi-asserted-by":"crossref","unstructured":"Chen, J., Mittal, G., Yu, Y., Kong, Y., Chen, M.: Gatehub: Gated history unit with background suppression for online action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 19925\u201319934 (2022)","DOI":"10.1109\/CVPR52688.2022.01930"},{"key":"1454_CR7","doi-asserted-by":"crossref","unstructured":"De\u00a0Geest, R., Gavves, E., Ghodrati, A., Li, Z., Snoek, C., Tuytelaars, T.: Online action detection. In: European Conference on Computer Vision, pp. 269\u2013284. Springer (2016)","DOI":"10.1007\/978-3-319-46454-1_17"},{"key":"1454_CR8","unstructured":"Du, T., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In: ICCV (2015)"},{"key":"1454_CR9","unstructured":"Du, T., Wang, H., Torresani, L., Ray, J., Lecun, Y.: A closer look at spatiotemporal convolutions for action recognition. In: CVPR (2018)"},{"key":"1454_CR10","doi-asserted-by":"crossref","unstructured":"Eun, H., Moon, J., Park, J., Jung, C., Kim, C.: Learning to discriminate information for online action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 809\u2013818 (2020)","DOI":"10.1109\/CVPR42600.2020.00089"},{"key":"1454_CR11","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2020.107695","volume":"111","author":"H Eun","year":"2021","unstructured":"Eun, H., Moon, J., Park, J., Jung, C., Kim, C.: Temporal filtering networks for online action detection. Pattern Recogn. 111, 107695 (2021)","journal-title":"Pattern Recogn."},{"key":"1454_CR12","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: Slowfast networks for video recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 6202\u20136211 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"1454_CR13","doi-asserted-by":"crossref","unstructured":"Gao, J., Chen, K., Nevatia, R.: Ctap: Complementary temporal action proposal generation. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 68\u201383 (2018)","DOI":"10.1007\/978-3-030-01216-8_5"},{"key":"1454_CR14","doi-asserted-by":"crossref","unstructured":"Gao, J., Yang, Z., Nevatia, R.: Red: Reinforced encoder-decoder networks for action anticipation. arXiv preprint arXiv:1707.04818 (2017)","DOI":"10.5244\/C.31.92"},{"key":"1454_CR15","doi-asserted-by":"crossref","unstructured":"Gao, M., Zhou, Y., Xu, R., Socher, R., Xiong, C.: Woad: Weakly supervised online action detection in untrimmed videos. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1915\u20131923 (2021)","DOI":"10.1109\/CVPR46437.2021.00195"},{"key":"1454_CR16","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Carreira, J., Doersch, C., Zisserman, A.: Video action transformer network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 244\u2013253 (2019)","DOI":"10.1109\/CVPR.2019.00033"},{"key":"1454_CR17","doi-asserted-by":"crossref","unstructured":"Guo, H., Ren, Z., Wu, Y., Hua, G., Ji, Q.: Uncertainty-based spatial-temporal attention for online action detection. In: European Conference on Computer Vision, pp. 69\u201386. Springer (2022)","DOI":"10.1007\/978-3-031-19772-7_5"},{"key":"1454_CR18","doi-asserted-by":"publisher","first-page":"6017","DOI":"10.1109\/TIP.2020.2987425","volume":"29","author":"J Hou","year":"2020","unstructured":"Hou, J., Wu, X., Wang, R., Luo, J., Jia, Y.: Confidence-guided self refinement for action prediction in untrimmed videos. IEEE Trans. Image Process. 29, 6017\u20136031 (2020). https:\/\/doi.org\/10.1109\/TIP.2020.2987425","journal-title":"IEEE Trans. Image Process."},{"key":"1454_CR19","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2021.3078324","author":"L Huang","year":"2021","unstructured":"Huang, L., Huang, Y., Ouyang, W., Wang, L.: Modeling sub-actions for weakly supervised temporal action localization. IEEE Trans. Image Process. (2021). https:\/\/doi.org\/10.1109\/TIP.2021.3078324","journal-title":"IEEE Trans. Image Process."},{"key":"1454_CR20","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.cviu.2016.10.018","volume":"155","author":"H Idrees","year":"2017","unstructured":"Idrees, H., Zamir, A.R., Jiang, Y.G., Gorban, A., Laptev, I., Sukthankar, R., Shah, M.: The thumos challenge on action recognition for videos in the wild. Comput. Vis. Image Underst. 155, 1\u201323 (2017)","journal-title":"Comput. Vis. Image Underst."},{"key":"1454_CR21","doi-asserted-by":"crossref","unstructured":"Jain, M., Ghodrati, A., Snoek, C.G.: Actionbytes: Learning from trimmed videos to localize actions. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1171\u20131180 (2020)","DOI":"10.1109\/CVPR42600.2020.00125"},{"key":"1454_CR22","doi-asserted-by":"crossref","unstructured":"Kalogeiton, V., Weinzaepfel, P., Ferrari, V., Schmid, C.: Action tubelet detector for spatio-temporal action localization. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4405\u20134413 (2017)","DOI":"10.1109\/ICCV.2017.472"},{"key":"1454_CR23","doi-asserted-by":"publisher","DOI":"10.1016\/j.patcog.2021.107954","volume":"116","author":"YH Kim","year":"2021","unstructured":"Kim, Y.H., Nam, S., Kim, S.J.: Temporally smooth online action detection using cycle-consistent future anticipation. Pattern Recogn. 116, 107954 (2021)","journal-title":"Pattern Recogn."},{"key":"1454_CR24","doi-asserted-by":"crossref","unstructured":"Laptev, I., Marszalek, M., Schmid, C., Rozenfeld, B.: Learning realistic human actions from movies. In: CVPR (2008)","DOI":"10.1109\/CVPR.2008.4587756"},{"key":"1454_CR25","doi-asserted-by":"crossref","unstructured":"Lea, C., Vidal, R., Reiter, A., Hager, G.D.: Temporal convolutional networks: A unified approach to action segmentation. In: Computer vision\u2013ECCV 2016 Workshops: Amsterdam, The Netherlands, October 8\u201310 and 15\u201316, 2016, proceedings, Part III 14, pp. 47\u201354. Springer (2016)","DOI":"10.1007\/978-3-319-49409-8_7"},{"key":"1454_CR26","doi-asserted-by":"crossref","unstructured":"Li, Y., Lan, C., Xing, J., Zeng, W., Yuan, C., Liu, J.: Online human action detection using joint classification-regression recurrent neural networks. In: ECCV (2016)","DOI":"10.1007\/978-3-319-46478-7_13"},{"key":"1454_CR27","doi-asserted-by":"crossref","unstructured":"Lin, J., Gan, C., Han, S.: TSM: Temporal shift module for efficient video understanding. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 7083\u20137093 (2019)","DOI":"10.1109\/ICCV.2019.00718"},{"key":"1454_CR28","doi-asserted-by":"crossref","unstructured":"Lin, T., Zhao, X., Su, H., Wang, C., Yang, M.: BSN: boundary sensitive network for temporal action proposal generation. In: ECCV (2018)","DOI":"10.1007\/978-3-030-01225-0_1"},{"key":"1454_CR29","doi-asserted-by":"publisher","first-page":"6937","DOI":"10.1109\/TIP.2022.3217368","volume":"31","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Wang, L., Wang, Y., Ma, X., Qiao, Y.: Fineaction: A fine-grained video dataset for temporal action localization. IEEE Trans. Image Process. 31, 6937\u20136950 (2022)","journal-title":"IEEE Trans. Image Process."},{"issue":"6","key":"1454_CR30","doi-asserted-by":"publisher","first-page":"122","DOI":"10.1007\/s11554-023-01374-9","volume":"20","author":"S Praveenkumar","year":"2023","unstructured":"Praveenkumar, S., Patil, P., Hiremath, P.: A novel algorithm for human action recognition in compressed domain using attention-guided approach. J. Real-Time Image Process. 20(6), 122 (2023)","journal-title":"J. Real-Time Image Process."},{"key":"1454_CR31","unstructured":"Qu, S., Chen, G., Xu, D., Dong, J., Lu, F., Knoll, A.: Lap-net: Adaptive features sampling via learning action progression for online action detection. arXiv preprint arXiv:2011.07915 (2020)"},{"key":"1454_CR32","doi-asserted-by":"crossref","unstructured":"Ramanishka, V., Chen, Y.T., Misu, T., Saenko, K.: Toward driving scene understanding: A dataset for learning driver behavior and causal reasoning. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 7699\u20137707 (2018)","DOI":"10.1109\/CVPR.2018.00803"},{"key":"1454_CR33","unstructured":"Ren, S., He, K., Girshick, R., Sun, J.: Faster r-cnn: Towards real-time object detection with region proposal networks. In: Advances in Neural Information Processing Systems, pp. 91\u201399 (2015)"},{"key":"1454_CR34","doi-asserted-by":"crossref","unstructured":"Shou, Z., Chan, J., Zareian, A., Miyazawa, K., Chang, S.F.: CDC: convolutional-de-convolutional networks for precise temporal action localization in untrimmed videos. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 5734\u20135743 (2017)","DOI":"10.1109\/CVPR.2017.155"},{"key":"1454_CR35","doi-asserted-by":"crossref","unstructured":"Shou, Z., Gao, H., Zhang, L., Miyazawa, K., Chang, S.F.: Autoloc: weakly-supervised temporal action localization in untrimmed videos. In: Proceedings of the European Conference on Computer Vision (ECCV), pp. 154\u2013171 (2018)","DOI":"10.1007\/978-3-030-01270-0_10"},{"key":"1454_CR36","doi-asserted-by":"crossref","unstructured":"Shou, Z., Wang, D., Chang, S.F.: Temporal action localization in untrimmed videos via multi-stage cnns. In: CVPR (2016)","DOI":"10.1109\/CVPR.2016.119"},{"key":"1454_CR37","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: NIPS (2014)"},{"key":"1454_CR38","doi-asserted-by":"publisher","first-page":"2103","DOI":"10.1109\/TIP.2020.3044218","volume":"30","author":"R Su","year":"2021","unstructured":"Su, R., Xu, D., Sheng, L., Ouyang, W.: PSG-TAL: Progressive cross-granularity cooperation for temporal action localization. IEEE Trans. Image Process. 30, 2103\u20132113 (2021). https:\/\/doi.org\/10.1109\/TIP.2020.3044218","journal-title":"IEEE Trans. Image Process."},{"key":"1454_CR39","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Adv. Neural Inf. Process. Syst. 30 (2017)"},{"issue":"1","key":"1454_CR40","doi-asserted-by":"publisher","first-page":"60","DOI":"10.1007\/s11263-012-0594-8","volume":"103","author":"H Wang","year":"2013","unstructured":"Wang, H., Kl\u00e4ser, A., Schmid, C., Liu, C.L.: Dense trajectories and motion boundary descriptors for action recognition. Int. J. Comput. Vis. 103(1), 60\u201379 (2013)","journal-title":"Int. J. Comput. Vis."},{"key":"1454_CR41","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Lin, D., Van\u00a0Gool, L.: Untrimmednets for weakly supervised action recognition and detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp. 4325\u20134334 (2017)","DOI":"10.1109\/CVPR.2017.678"},{"key":"1454_CR42","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Y.: Towards good practices for very deep two-stream convnets. arXiv preprint arXiv:1507.02159 (2015)"},{"key":"1454_CR43","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Y., Lin, D., Tang, X., Van\u00a0Gool, L.: Temporal segment networks: Towards good practices for deep action recognition. In: European Conference on Computer Vision, pp. 20\u201336. Springer (2016)","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"1454_CR44","doi-asserted-by":"crossref","unstructured":"Wang, X., Zhang, S., Qing, Z., Shao, Y., Zuo, Z., Gao, C., Sang, N.: Oadtr: Online action detection with transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp. 7565\u20137575 (2021)","DOI":"10.1109\/ICCV48922.2021.00747"},{"issue":"1","key":"1454_CR45","doi-asserted-by":"publisher","first-page":"15","DOI":"10.1007\/s11554-023-01277-9","volume":"20","author":"X Wei","year":"2023","unstructured":"Wei, X., Yao, S., Zhao, C., Hu, D., Luo, H., Lu, Y.: Lightweight multimodal feature graph convolutional network for dangerous driving behavior detection. J. Real-Time Image Process. 20(1), 15 (2023)","journal-title":"J. Real-Time Image Process."},{"key":"1454_CR46","unstructured":"Xiong, Y., Wang, L., Wang, Z., Zhang, B., Song, H., Li, W., Lin, D., Qiao, Y., Van\u00a0Gool, L., Tang, X.: Cuhk & ethz & siat submission to activitynet challenge 2016. arXiv preprint arXiv:1608.00797 (2016)"},{"key":"1454_CR47","doi-asserted-by":"crossref","unstructured":"Xu, H., Das, A., Saenko, K.: R-c3d: Region convolutional 3d network for temporal activity detection. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.617"},{"key":"1454_CR48","doi-asserted-by":"crossref","unstructured":"Xu, M., Gao, M., Chen, Y.T., Davis, L.S., Crandall, D.J.: Temporal recurrent networks for online action detection. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 5532\u20135541 (2019)","DOI":"10.1109\/ICCV.2019.00563"},{"key":"1454_CR49","first-page":"1086","volume":"34","author":"M Xu","year":"2021","unstructured":"Xu, M., Xiong, Y., Chen, H., Li, X., Xia, W., Tu, Z., Soatto, S.: Long short-term transformer for online action detection. Adv. Neural Inf. Process. Syst. 34, 1086\u20131099 (2021)","journal-title":"Adv. Neural Inf. Process. Syst."},{"key":"1454_CR50","doi-asserted-by":"crossref","unstructured":"Yang, L., Han, J., Zhang, D.: Colar: Effective and efficient online action detection by consulting exemplars. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3160\u20133169 (2022)","DOI":"10.1109\/CVPR52688.2022.00316"},{"key":"1454_CR51","doi-asserted-by":"publisher","first-page":"8535","DOI":"10.1109\/TIP.2020.3016486","volume":"29","author":"L Yang","year":"2020","unstructured":"Yang, L., Peng, H., Zhang, D., Fu, J., Han, J.: Revisiting anchor mechanisms for temporal action localization. IEEE Trans. Image Process. 29, 8535\u20138548 (2020). https:\/\/doi.org\/10.1109\/TIP.2020.3016486","journal-title":"IEEE Trans. Image Process."},{"issue":"12","key":"1454_CR52","doi-asserted-by":"publisher","first-page":"5797","DOI":"10.1109\/TIP.2019.2922108","volume":"28","author":"R Zeng","year":"2019","unstructured":"Zeng, R., Gan, C., Chen, P., Huang, W., Wu, Q., Tan, M.: Breaking winner-takes-all: Iterative-winners-out networks for weakly supervised temporal action localization. IEEE Trans. Image Process. 28(12), 5797\u20135808 (2019). https:\/\/doi.org\/10.1109\/TIP.2019.2922108","journal-title":"IEEE Trans. Image Process."},{"key":"1454_CR53","doi-asserted-by":"crossref","unstructured":"Zeng, R., Huang, W., Tan, M., Rong, Y., Zhao, P., Huang, J., Gan, C.: Graph convolutional networks for temporal action localization. In: ICCV (2019)","DOI":"10.1109\/ICCV.2019.00719"},{"issue":"6","key":"1454_CR54","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s11554-023-01377-6","volume":"20","author":"Y Zhang","year":"2023","unstructured":"Zhang, Y., Gan, J., Zhao, Z., Chen, J., Chen, X., Diao, Y., Tu, S.: A real-time fall detection model based on BlazePose and improved ST-GCN. J. Real-Time Image Process. 20(6), 1\u201312 (2023)","journal-title":"J. Real-Time Image Process."},{"key":"1454_CR55","unstructured":"Zhao, P., Xie, L., Zhang, Y., Wang, Y., Tian, Q.: Privileged knowledge distillation for online action detection. arXiv preprint arXiv:2011.09158 (2020)"},{"key":"1454_CR56","doi-asserted-by":"crossref","unstructured":"Zhao, Y., Xiong, Y., Wang, L., Wu, Z., Tang, X., Lin, D.: Temporal action detection with structured segment networks. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 2914\u20132923 (2017)","DOI":"10.1109\/ICCV.2017.317"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01454-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-024-01454-4\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01454-4.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,6,6]],"date-time":"2024-06-06T08:35:33Z","timestamp":1717662933000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-024-01454-4"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,4,10]]},"references-count":56,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2024,6]]}},"alternative-id":["1454"],"URL":"https:\/\/doi.org\/10.1007\/s11554-024-01454-4","relation":{},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"type":"print","value":"1861-8200"},{"type":"electronic","value":"1861-8219"}],"subject":[],"published":{"date-parts":[[2024,4,10]]},"assertion":[{"value":"25 December 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"18 March 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"10 April 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare no competing interests.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}],"article-number":"73"}}