{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,26]],"date-time":"2026-02-26T15:33:02Z","timestamp":1772119982583,"version":"3.50.1"},"reference-count":61,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T00:00:00Z","timestamp":1724371200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T00:00:00Z","timestamp":1724371200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["J Real-Time Image Proc"],"published-print":{"date-parts":[[2024,10]]},"DOI":"10.1007\/s11554-024-01541-6","type":"journal-article","created":{"date-parts":[[2024,8,23]],"date-time":"2024-08-23T18:15:53Z","timestamp":1724436953000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Efficient spatio-temporal network for action recognition"],"prefix":"10.1007","volume":"21","author":[{"given":"Yanxiong","family":"Su","sequence":"first","affiliation":[]},{"given":"Qian","family":"Zhao","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,8,23]]},"reference":[{"key":"1541_CR1","unstructured":"Simonyan, K., Zisserman, A.: Two-stream convolutional networks for action recognition in videos. In: Advances in neural information processing systems, vol. 27 (2014). https:\/\/api.semanticscholar.org\/CorpusID:11797475"},{"key":"1541_CR2","doi-asserted-by":"crossref","unstructured":"Tran, D., Bourdev, L., Fergus, R., Torresani, L., Paluri, M.: Learning spatiotemporal features with 3d convolutional networks. In Proceedings of the IEEE International Conference on Computer Vision, pages 4489\u20134497, (2015)","DOI":"10.1109\/ICCV.2015.510"},{"key":"1541_CR3","doi-asserted-by":"crossref","unstructured":"Qiu, Z., Yao, T., Mei, T.: Learning spatio-temporal representation with pseudo-3d residual networks. In: Proceedings of the IEEE International Conference on Computer Vision, pages 5533\u20135541, (2017)","DOI":"10.1109\/ICCV.2017.590"},{"key":"1541_CR4","doi-asserted-by":"crossref","unstructured":"Xie, S., Sun, C., Huang, J., Tu Z., Murphy, K. Rethinking spatiotemporal feature learning: Speed-accuracy trade-offs in video classification. In: Proceedings of the European Conference on Computer Vision (ECCV), pages 305\u2013321, (2018)","DOI":"10.1007\/978-3-030-01267-0_19"},{"key":"1541_CR5","doi-asserted-by":"crossref","unstructured":"Tran, D., Wang, H., Torresani, L., Ray, J., LeCun, Y., Paluri, M.: A closer look at spatiotemporal convolutions for action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 6450\u20136459, (2018)","DOI":"10.1109\/CVPR.2018.00675"},{"key":"1541_CR6","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.. Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 770\u2013778, (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1541_CR7","doi-asserted-by":"crossref","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Y., Lin, D., Tang, X., Van\u00a0Gool, L.: Temporal segment networks: Towards good practices for deep action recognition. In: European Conference on Computer Vision, pages 20\u201336. Springer, (2016)","DOI":"10.1007\/978-3-319-46484-8_2"},{"key":"1541_CR8","doi-asserted-by":"crossref","unstructured":"Zhou, B., Andonian, A., Oliva, A., Torralba, A.: Temporal relational reasoning in videos. In: Proceedings of the European Conference on Computer Vision (ECCV), pages 803\u2013818, (2018)","DOI":"10.1007\/978-3-030-01246-5_49"},{"key":"1541_CR9","doi-asserted-by":"crossref","unstructured":"Lin, J., Gan, C., Han, S.: Tsm: Temporal shift module for efficient video understanding. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 7083\u20137093, (2019)","DOI":"10.1109\/ICCV.2019.00718"},{"key":"1541_CR10","doi-asserted-by":"publisher","first-page":"5174","DOI":"10.1109\/TCSVT.2023.3250646","volume":"33","author":"Z Li","year":"2023","unstructured":"Li, Z., Li, J., Ma, Y., Wang, R., Shi, Z., Ding, Y., Liu, X.: Spatio-temporal adaptive network with bidirectional temporal difference for action recognition. IEEE Trans. Circuits Syst. Video Technol. 33, 5174\u20135185 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1541_CR11","doi-asserted-by":"crossref","unstructured":"Lee, M., Lee, S., Son, S., Park, G., Kwak, N.: Motion feature network: Fixed motion filter for action recognition. In: Proceedings of the European Conference on Computer Vision (ECCV), pages 387\u2013403, (2018)","DOI":"10.1007\/978-3-030-01249-6_24"},{"key":"1541_CR12","doi-asserted-by":"crossref","unstructured":"Li, Y., Ji, B., Shi, X., Zhang, J., Kang, B., Wang, L.: Tea: temporal excitation and aggregation for action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 909\u2013918, (2020)","DOI":"10.1109\/CVPR42600.2020.00099"},{"key":"1541_CR13","doi-asserted-by":"crossref","unstructured":"Liu, Z., Luo, D., Wang, Y., Wang, L., Tai, Y., Wang, C., Li, J., Huang, F., Tong, L.: Teinet: towards an efficient architecture for video recognition. In Proceedings of the AAAI Conference on Artificial Intelligence 34, 11669\u201311676 (2020)","DOI":"10.1609\/aaai.v34i07.6836"},{"key":"1541_CR14","doi-asserted-by":"crossref","unstructured":"Liu, Z., Wang, L., Wayne, W., Qian, C., Tong, L.: Tam: temporal adaptive module for video recognition. In Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 13708\u201313718 (2021)","DOI":"10.1109\/ICCV48922.2021.01345"},{"key":"1541_CR15","doi-asserted-by":"crossref","unstructured":"Shao, H., Qian, S., Liu, Y.: Temporal interlacing network. In Proceedings of the AAAI Conference on Artificial Intelligence 34, 11966\u201311973 (2020)","DOI":"10.1609\/aaai.v34i07.6872"},{"key":"1541_CR16","doi-asserted-by":"crossref","unstructured":"Hao, Y., Zhang, H., Ngo, C.W., He, X.: Group contextualization for video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 928\u2013938, (2022)","DOI":"10.1109\/CVPR52688.2022.00100"},{"key":"1541_CR17","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, H.: Slowfast networks for video recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 6202\u20136211, (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"1541_CR18","doi-asserted-by":"publisher","DOI":"10.1016\/j.imavis.2023.104740","volume":"137","author":"S Li","year":"2023","unstructured":"Li, S., Wang, Z., Liu, Y., Zhang, Y., Zhu, J., Cui, X., Liu, J.: Fsformer: fast-slow transformer for video action recognition. Image Vis. Comput. 137, 104740 (2023)","journal-title":"Image Vis. Comput."},{"key":"1541_CR19","doi-asserted-by":"crossref","unstructured":"Yang, C., Xu, Y., Shi, J., Dai, B., Zhou, B.: Temporal pyramid network for action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 591\u2013600, (2020)","DOI":"10.1109\/CVPR42600.2020.00067"},{"issue":"3","key":"1541_CR20","doi-asserted-by":"publisher","first-page":"977","DOI":"10.1109\/TCSVT.2022.3207518","volume":"33","author":"X Sheng","year":"2022","unstructured":"Sheng, X., Li, K., Shen, Z., Xiao, G.: A progressive difference method for capturing visual tempos on action recognition. IEEE Trans. Circuits Syst. Video Technol. 33(3), 977\u2013987 (2022)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1541_CR21","doi-asserted-by":"publisher","first-page":"4104","DOI":"10.1109\/TIP.2022.3180585","volume":"31","author":"Y Liu","year":"2022","unstructured":"Liu, Y., Yuan, J., Zhigang, T.: Motion-driven visual tempo learning for video-based action recognition. IEEE Trans. Image Process. 31, 4104\u20134116 (2022)","journal-title":"IEEE Trans. Image Process."},{"key":"1541_CR22","doi-asserted-by":"crossref","unstructured":"Sudhakaran, S., Escalera, S., Lanz, O.: Gate-shift networks for video action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 1102\u20131111, (2020)","DOI":"10.1109\/CVPR42600.2020.00118"},{"key":"1541_CR23","doi-asserted-by":"crossref","unstructured":"Luo, C., Yuille, A.L.: Grouped spatial-temporal aggregation for efficient action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 5512\u20135521, (2019)","DOI":"10.1109\/ICCV.2019.00561"},{"key":"1541_CR24","doi-asserted-by":"crossref","unstructured":"Jiang, B., Wang, M.M., Gan, W., Wu, W., Yan, J.: Stm: spatiotemporal and motion encoding for action recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 2000\u20132009, (2019)","DOI":"10.1109\/ICCV.2019.00209"},{"key":"1541_CR25","doi-asserted-by":"crossref","unstructured":"Wang, Z., She, Q., Smolic, A.: Action-net: multipath excitation for action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 13214\u201313223, (2021)","DOI":"10.1109\/CVPR46437.2021.01301"},{"key":"1541_CR26","doi-asserted-by":"crossref","unstructured":"Kwon, H., Kim, M., Kwak, S., Cho, M.: Motionsqueeze: neural motion feature learning for video understanding. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XVI 16, pages 345\u2013362. Springer, (2020)","DOI":"10.1007\/978-3-030-58517-4_21"},{"key":"1541_CR27","doi-asserted-by":"publisher","first-page":"3912","DOI":"10.1109\/TCSVT.2023.3235522","volume":"33","author":"Y Chen","year":"2023","unstructured":"Chen, Y., Ge, H., Liu, Y., Cai, X., Sun, L.: Agpn: action granularity pyramid network for video action recognition. IEEE Trans. Circuits Syst. Video Technol. 33, 3912\u20133923 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"1541_CR28","unstructured":"Fan, Q., Richard Chen, C.-F., Kuehne, H., Pistoia, M., Cox, D.: More is less: learning efficient video representations by big-little network and depthwise temporal aggregation. In: Advances in Neural Information Processing Systems, vol. 32 (2019). https:\/\/api.semanticscholar.org\/CorpusID:208134035"},{"key":"1541_CR29","doi-asserted-by":"crossref","unstructured":"Wang, L., Tong, Z., Ji, B., Wu, G.: Tdn: temporal difference networks for efficient action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 1895\u20131904, (2021)","DOI":"10.1109\/CVPR46437.2021.00193"},{"key":"1541_CR30","doi-asserted-by":"crossref","unstructured":"Gong, G., Zheng, L., Mu, Y.: Scale matters: temporal scale aggregation network for precise action localization in untrimmed videos. In: 2020 IEEE international Conference on Multimedia and Expo (ICME), pages 1\u20136. IEEE, (2020)","DOI":"10.1109\/ICME46284.2020.9102850"},{"key":"1541_CR31","doi-asserted-by":"publisher","DOI":"10.1016\/j.displa.2023.102569","volume":"80","author":"X Gao","year":"2023","unstructured":"Gao, X., Chang, Z., Li, Y., Ran, X., Ke, W., Yonggang, L.: Fine-gained motion enhancement for action recognition: Focusing on action-related regions. Displays 80, 102569 (2023)","journal-title":"Displays"},{"issue":"2","key":"1541_CR32","doi-asserted-by":"publisher","first-page":"87","DOI":"10.1007\/s11063-024-11547-7","volume":"56","author":"L Zhou","year":"2024","unstructured":"Zhou, L., Yuanyao, L., Jiang, H.: Fease: feature selection and enhancement networks for action recognition. Neural Process. Lett. 56(2), 87 (2024)","journal-title":"Neural Process. Lett."},{"key":"1541_CR33","doi-asserted-by":"crossref","unstructured":"Dai, R., Das, S., Kahatapitiya, K., Ryoo, M.S., Br\u00e9mond, F.: Ms-tct: multi-scale temporal convtransformer for action detection. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 20041\u201320051, (2022)","DOI":"10.1109\/CVPR52688.2022.01941"},{"key":"1541_CR34","doi-asserted-by":"crossref","unstructured":"Truong, T.-D., Bui, Q.-H., Duong, C.H., Seo, H.-S., Phung, S.L., Li, X., Luu, K.: Direcformer: A directed attention in transformer approach to robust action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 20030\u201320040, (2022)","DOI":"10.1109\/CVPR52688.2022.01940"},{"key":"1541_CR35","unstructured":"Dosovitskiy, A., Beyer, L., Kolesnikov, A., Weissenborn, D., Zhai, X., Unterthiner, T., Dehghani, M., Minderer, M., Heigold, G., Gelly, S., et\u00a0al.: An image is worth 16x16 words: Transformers for image recognition at scale. arXiv preprint arXiv:2010.11929, (2020)"},{"key":"1541_CR36","doi-asserted-by":"publisher","first-page":"218","DOI":"10.1109\/TMM.2023.3263288","volume":"26","author":"Z Qing","year":"2023","unstructured":"Qing, Z., Zhang, S., Huang, Z., Wang, X., Wang, Y., Lv, Y., Gao, C., Sang, N.: Mar: masked autoencoders for efficient action recognition. IEEE Trans. Multimed. 26, 218\u2013233 (2023)","journal-title":"IEEE Trans. Multimed."},{"key":"1541_CR37","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? a new model and the kinetics dataset. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 6299\u20136308, (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"1541_CR38","doi-asserted-by":"crossref","unstructured":"Li, X., Wang, Y., Zhou, Z., Qiao, Y.: Smallbignet: integrating core and contextual views for video classification. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 1092\u20131101, (2020)","DOI":"10.1109\/CVPR42600.2020.00117"},{"key":"1541_CR39","doi-asserted-by":"crossref","unstructured":"Zolfaghari, M., Singh, K., Brox, T.: Eco: efficient convolutional network for online video understanding. In: Proceedings of the European Conference on computer vision (ECCV), pages 695\u2013712, (2018)","DOI":"10.1007\/978-3-030-01216-8_43"},{"issue":"7","key":"1541_CR40","doi-asserted-by":"publisher","first-page":"10261","DOI":"10.1007\/s11042-021-11766-3","volume":"81","author":"Y Cao","year":"2022","unstructured":"Cao, Y., Tang, Q., Xiaobo, L.: Stcnet: spatiotemporal cross network for industrial smoke detection. Multimed. Tools Appl. 81(7), 10261\u201310277 (2022)","journal-title":"Multimed. Tools Appl."},{"key":"1541_CR41","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C.: X3d: expanding architectures for efficient video recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 203\u2013213, (2020)","DOI":"10.1109\/CVPR42600.2020.00028"},{"key":"1541_CR42","doi-asserted-by":"crossref","unstructured":"Zhou, Y., Sun, X., Zha, Z.-J., Zeng, W.: Mict: mixed 3d\/2d convolutional tube for human action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 449\u2013458, (2018)","DOI":"10.1109\/CVPR.2018.00054"},{"key":"1541_CR43","doi-asserted-by":"crossref","unstructured":"Li, C., Zhong, Q., Xie, D., Shiliang, P.: Collaborative spatiotemporal feature learning for video action recognition. In Proceedings of the ieee\/cvf Conference on Computer Vision and Pattern recognition, pages 7872\u20137881 (2019)","DOI":"10.1109\/CVPR.2019.00806"},{"key":"1541_CR44","unstructured":"Li, K., Li, X., Wang, Y., Wang, J., Qiao, Y.: Ct-net: Channel tensorization network for video classification. arXiv preprint arXiv:2106.01603, (2021)"},{"key":"1541_CR45","doi-asserted-by":"crossref","unstructured":"Li, X., Shuai, B., Tighe, J.. Directional temporal modeling for action recognition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part VI 16, pages 275\u2013291. Springer, (2020)","DOI":"10.1007\/978-3-030-58539-6_17"},{"key":"1541_CR46","doi-asserted-by":"crossref","unstructured":"Chen, Y., Kalantidis, Y., Li, J., Yan, S., Feng, J.: Multi-fiber networks for video recognition. In: Proceedings of the European Conference on Computer Vision (ECCV), pages 352\u2013367, (2018)","DOI":"10.1007\/978-3-030-01246-5_22"},{"key":"1541_CR47","doi-asserted-by":"crossref","unstructured":"Hussein, N., Gavves, E., Smeulders, A.W.M.: Timeception for complex action recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 254\u2013263, (2019)","DOI":"10.1109\/CVPR.2019.00034"},{"key":"1541_CR48","doi-asserted-by":"crossref","unstructured":"Seong, H., Hyun, J., Kim, E.: Kernelized memory network for video object segmentation. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXII 16, pages 629\u2013645. Springer, (2020)","DOI":"10.1007\/978-3-030-58542-6_38"},{"key":"1541_CR49","doi-asserted-by":"crossref","unstructured":"Goyal, R., Kahou, S.E., Michalski, V., Materzynska, J., Westphal, S., Kim, H., Haenel, V., Fruend, I., Yianilos, P., Mueller-Freitag, M., et\u00a0al.: The \"something something\" video database for learning and evaluating visual common sense. In: Proceedings of the IEEE International Conference on Computer Vision, pages 5842\u20135850, (2017)","DOI":"10.1109\/ICCV.2017.622"},{"key":"1541_CR50","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T., Serre, T.: Hmdb: a large video database for human motion recognition. In: 2011 International Conference on Computer Vision, pages 2556\u20132563. IEEE, (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"1541_CR51","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.J., Li, K., Fei-Fei, L.: Imagenet: a large-scale hierarchical image database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition, pages 248\u2013255. Ieee, (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1541_CR52","doi-asserted-by":"crossref","unstructured":"Zhu, X., Xu, C., Hui, L., Lu, C., Tao, D.: Approximated bilinear modules for temporal modeling. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pages 3494\u20133503, (2019)","DOI":"10.1109\/ICCV.2019.00359"},{"key":"1541_CR53","doi-asserted-by":"publisher","first-page":"10913","DOI":"10.1109\/TPAMI.2023.3268134","volume":"45","author":"S Sudhakaran","year":"2023","unstructured":"Sudhakaran, S., Escalera, S., Lanz, O.: Gate-shift-fuse for video action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 45, 10913\u201310928 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"issue":"10","key":"1541_CR54","doi-asserted-by":"publisher","first-page":"2453","DOI":"10.1007\/s11263-022-01661-1","volume":"130","author":"Y Tian","year":"2022","unstructured":"Tian, Y., Yan, Y., Zhai, G., Guo, G., Gao, Z.: Ean: event adaptive network for enhanced action recognition. Int. J. Comput. Vis. 130(10), 2453\u20132471 (2022)","journal-title":"Int. J. Comput. Vis."},{"key":"1541_CR55","doi-asserted-by":"crossref","unstructured":"Mingyu, W., Jiang, B., Luo, D., Yan, J., Wang, Y., Tai, Y., Wang, C., Li, J., Huang, F., Yang, X.: Learning comprehensive motion representation for action recognition. In Proceedings of the AAAI Conference on Artificial Intelligence 35, 2934\u20132942 (2021)","DOI":"10.1609\/aaai.v35i4.16400"},{"key":"1541_CR56","doi-asserted-by":"crossref","unstructured":"Wang, X., Gupta, A.: Videos as space-time region graphs. In: Proceedings of the European Conference on Computer Vision (ECCV), pages 399\u2013417, (2018)","DOI":"10.1007\/978-3-030-01228-1_25"},{"key":"1541_CR57","doi-asserted-by":"crossref","unstructured":"Wang, H., Tran, D., Torresani, L., Feiszli, M.: Video modeling with correlation networks. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 352\u2013361, (2020)","DOI":"10.1109\/CVPR42600.2020.00043"},{"key":"1541_CR58","doi-asserted-by":"crossref","unstructured":"Liu, X., Lee, J.-Y., Jin, H.: Learning video representations from correspondence proposals. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pages 4273\u20134281, (2019)","DOI":"10.1109\/CVPR.2019.00440"},{"issue":"18","key":"1541_CR59","doi-asserted-by":"publisher","first-page":"14593","DOI":"10.1007\/s00521-020-05144-7","volume":"32","author":"Z Liu","year":"2020","unstructured":"Liu, Z., Li, Z., Wang, R., Zong, M., Ji, W.: Spatiotemporal saliency-based multi-stream networks with attention-aware lstm for action recognition. Neural Comput. Appl. 32(18), 14593\u201314602 (2020)","journal-title":"Neural Comput. Appl."},{"key":"1541_CR60","doi-asserted-by":"crossref","unstructured":"Sun, S., Kuang, Z., Sheng, L., Ouyang, W., Zhang, W.: Optical flow guided feature: a fast and robust motion representation for video action recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pages 1390\u20131399, (2018)","DOI":"10.1109\/CVPR.2018.00151"},{"key":"1541_CR61","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE International Conference on Computer Vision, pages 618\u2013626, (2017)","DOI":"10.1109\/ICCV.2017.74"}],"container-title":["Journal of Real-Time Image Processing"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01541-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11554-024-01541-6\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11554-024-01541-6.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2024,10,15]],"date-time":"2024-10-15T11:18:45Z","timestamp":1728991125000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11554-024-01541-6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,8,23]]},"references-count":61,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2024,10]]}},"alternative-id":["1541"],"URL":"https:\/\/doi.org\/10.1007\/s11554-024-01541-6","relation":{"has-preprint":[{"id-type":"doi","id":"10.21203\/rs.3.rs-4679346\/v1","asserted-by":"object"}]},"ISSN":["1861-8200","1861-8219"],"issn-type":[{"value":"1861-8200","type":"print"},{"value":"1861-8219","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,8,23]]},"assertion":[{"value":"3 July 2024","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 August 2024","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"23 August 2024","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"158"}}