{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,24]],"date-time":"2026-02-24T22:47:24Z","timestamp":1771973244954,"version":"3.50.1"},"reference-count":65,"publisher":"Springer Science and Business Media LLC","issue":"2","license":[{"start":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T00:00:00Z","timestamp":1715644800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T00:00:00Z","timestamp":1715644800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100012226","name":"Fundamental Research Funds for the Central Universities","doi-asserted-by":"publisher","award":["WHUTIOT2023-002"],"award-info":[{"award-number":["WHUTIOT2023-002"]}],"id":[{"id":"10.13039\/501100012226","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["62271361"],"award-info":[{"award-number":["62271361"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,1]]},"DOI":"10.1007\/s00371-024-03427-x","type":"journal-article","created":{"date-parts":[[2024,5,14]],"date-time":"2024-05-14T08:02:03Z","timestamp":1715673723000},"page":"1379-1394","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":8,"title":["Fragrant: frequency-auxiliary guided relational attention network for low-light action recognition"],"prefix":"10.1007","volume":"41","author":[{"given":"Wenxuan","family":"Liu","sequence":"first","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xuemei","family":"Jia","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yihao","family":"Ju","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Yakun","family":"Ju","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Kui","family":"Jiang","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Shifeng","family":"Wu","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Luo","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Xian","family":"Zhong","sequence":"additional","affiliation":[],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"297","published-online":{"date-parts":[[2024,5,14]]},"reference":[{"key":"3427_CR1","doi-asserted-by":"crossref","unstructured":"Hu, M., Jiang, K., Liao, L., Xiao, J., Jiang, J., Wang, Z.: Spatial-temporal space hand-in-hand: Spatial-temporal video super-resolution via cycle-projected mutual learning. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 3564\u20133573 (2022)","DOI":"10.1109\/CVPR52688.2022.00356"},{"issue":"7","key":"3427_CR2","doi-asserted-by":"publisher","first-page":"6662","DOI":"10.1109\/TCYB.2021.3079311","volume":"52","author":"B Sheng","year":"2022","unstructured":"Sheng, B., Li, P., Ali, R., Chen, C.L.P.: Improving video temporal consistency via broad learning system. IEEE Trans. Cybern. 52(7), 6662\u20136675 (2022)","journal-title":"IEEE Trans. Cybern."},{"issue":"5","key":"3427_CR3","doi-asserted-by":"publisher","first-page":"2774","DOI":"10.1109\/TSMC.2019.2916896","volume":"51","author":"A Kamel","year":"2021","unstructured":"Kamel, A., Sheng, B., Li, P., Kim, J., Feng, D.D.: Efficient body motion quantification and similarity evaluation using 3-d joints skeleton coordinates. IEEE Trans. Syst. Man Cybern. Syst. 51(5), 2774\u20132788 (2021)","journal-title":"IEEE Trans. Syst. Man Cybern. Syst."},{"issue":"3","key":"3427_CR4","doi-asserted-by":"publisher","first-page":"105","DOI":"10.1145\/3565886","volume":"19","author":"W Huang","year":"2023","unstructured":"Huang, W., Jia, X., Zhong, X., Wang, X., Jiang, K., Wang, Z.: Beyond the parts: learning coarse-to-fine adaptive alignment representation for person search. ACM Trans. Multimedia Comput. Commun. Appl. 19(3), 105\u2013110519 (2023)","journal-title":"ACM Trans. Multimedia Comput. Commun. Appl."},{"key":"3427_CR5","doi-asserted-by":"publisher","first-page":"2719","DOI":"10.1109\/TIP.2023.3273459","volume":"32","author":"W Liu","year":"2023","unstructured":"Liu, W., Zhong, X., Zhou, Z., Jiang, K., Wang, Z., Lin, C.: Dual-recommendation disentanglement network for view fuzz in action recognition. IEEE Trans. Image Process. 32, 2719\u20132733 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"3427_CR6","doi-asserted-by":"crossref","unstructured":"Feichtenhofer, C., Fan, H., Malik, J., He, K.: SlowFast networks for video recognition. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 6201\u20136210 (2019)","DOI":"10.1109\/ICCV.2019.00630"},{"key":"3427_CR7","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102043","volume":"102","author":"C Tian","year":"2024","unstructured":"Tian, C., Zheng, M., Zuo, W., Zhang, S., Zhang, Y., Lin, C.: A cross transformer for image denoising. Inf. Fus. 102, 102043 (2024)","journal-title":"Inf. Fus."},{"key":"3427_CR8","doi-asserted-by":"crossref","unstructured":"Zhong, X., Tu, S., Ma, X., Jiang, K., Huang, W., Wang, Z.: Rainy WCity: A real rainfall dataset with diverse conditions for semantic driving scene understanding. In: Proceedings of International Joint Conferences on Artificial Intelligence, pp. 1743\u20131749 (2022)","DOI":"10.24963\/ijcai.2022\/243"},{"key":"3427_CR9","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.101822","volume":"97","author":"B Zhang","year":"2023","unstructured":"Zhang, B., Suo, J., Dai, Q.: A complementary dual-backbone transformer extracting and fusing weak cues for object detection in extremely dark videos. Inf. Fus. 97, 101822 (2023)","journal-title":"Inf. Fus."},{"key":"3427_CR10","doi-asserted-by":"crossref","unstructured":"Wang, H., Schmid, C.: Action recognition with improved trajectories. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 3551\u20133558 (2013)","DOI":"10.1109\/ICCV.2013.441"},{"key":"3427_CR11","unstructured":"Pan, Z., Cai, J., Zhuang, B.: Fast vision transformers with Hilo attention. Adv. Neural Inf. Process. Syst. (2022)"},{"key":"3427_CR12","doi-asserted-by":"publisher","first-page":"420","DOI":"10.1109\/TASSP.1974.1162620","volume":"22","author":"H Buijs","year":"1974","unstructured":"Buijs, H., Pomerleau, A., Fournier, M., Tam, W.Y.: Implementation of a fast Fourier transform (fft) for image processing applications. IEEE Trans. Acoust. Speech Signal Process. 22, 420\u2013424 (1974)","journal-title":"IEEE Trans. Acoust. Speech Signal Process."},{"issue":"3","key":"3427_CR13","doi-asserted-by":"publisher","first-page":"355","DOI":"10.1016\/S0734-189X(87)80186-X","volume":"39","author":"SM Pizer","year":"1987","unstructured":"Pizer, S.M., Amburn, E.P., Austin, J.D., Cromartie, R., Zuiderveld, K.: Adaptive histogram equalization and its variations. Comput. Vis. Graph. Image Process. 39(3), 355\u2013368 (1987)","journal-title":"Comput. Vis. Graph. Image Process."},{"issue":"2\u20133","key":"3427_CR14","doi-asserted-by":"publisher","first-page":"107","DOI":"10.1007\/s11263-005-1838-7","volume":"64","author":"I Laptev","year":"2005","unstructured":"Laptev, I.: On space-time interest points. Int. J. Comput. Vis. 64(2\u20133), 107\u2013123 (2005)","journal-title":"Int. J. Comput. Vis."},{"key":"3427_CR15","doi-asserted-by":"publisher","first-page":"36","DOI":"10.1016\/j.neucom.2016.05.094","volume":"212","author":"C Gao","year":"2016","unstructured":"Gao, C., Du, Y., Liu, J., Lv, J., Yang, L., Meng, D., Hauptmann, A.G.: InfAR dataset: infrared action recognition at different times. Neurocomputing 212, 36\u201347 (2016)","journal-title":"Neurocomputing"},{"key":"3427_CR16","doi-asserted-by":"crossref","unstructured":"Jiang, Z., Rozgic, V., Adali, S.: Learning spatiotemporal features for infrared action recognition with 3D convolutional neural networks. In: Proceedings of IEEE \/ CVF Computer Vision and Pattern Recognition Conference Workshops, pp. 309\u2013317 (2017)","DOI":"10.1109\/CVPRW.2017.44"},{"key":"3427_CR17","doi-asserted-by":"crossref","unstructured":"de la Riva, M., Mettes, P.: Bayesian 3D convnets for action recognition from few examples. In: Proceedings of IEEE\/CVF International Conference on Computer Vision Workshops, pp. 1337\u20131343 (2019)","DOI":"10.1109\/ICCVW.2019.00169"},{"key":"3427_CR18","doi-asserted-by":"crossref","unstructured":"Xu, L., Zhong, X., Liu, W., Zhao, S., Yang, Z., Zhong, L.: Subspace enhancement and colorization network for infrared video action recognition. In: Proceedings of Pacific Rim International Conference on Artificial Intelligence, pp. 321\u2013336 (2021)","DOI":"10.1007\/978-3-030-89370-5_24"},{"key":"3427_CR19","unstructured":"Li, K., Wang, Y., Gao, P., Song, G., Liu, Y., Li, H., Qiao, Y.: UniFormer: Unified transformer for efficient spatial-temporal representation learning. In: Proceedings of International Conference on Learning Representations (2022)"},{"key":"3427_CR20","doi-asserted-by":"crossref","unstructured":"Li, K., Wang, Y., He, Y., Li, Y., Wang, Y., Wang, L., Qiao, Y.: UniFormerV2: Unlocking the potential of image vits for video understanding. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 1632\u20131643 (2023)","DOI":"10.1109\/ICCV51070.2023.00157"},{"key":"3427_CR21","doi-asserted-by":"crossref","unstructured":"Lavin, A., Gray, S.: Fast algorithms for convolutional neural networks. In: Proceedings of IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp. 4013\u20134021 (2016)","DOI":"10.1109\/CVPR.2016.435"},{"key":"3427_CR22","doi-asserted-by":"crossref","unstructured":"Shchekotov, I., Andreev, P.K., Ivanov, O., Alanov, A., Vetrov, D.: FFC-SE: Fast Fourier convolution for speech enhancement. In: Proceedings of International Speech Communication Association, pp. 1188\u20131192 (2022)","DOI":"10.21437\/Interspeech.2022-603"},{"key":"3427_CR23","unstructured":"Tancik, M., Srinivasan, P.P., Mildenhall, B., Fridovich-Keil, S., Raghavan, N., Singhal, U., Ramamoorthi, R., Barron, J.T., Ng, R.: Fourier features let networks learn high frequency functions in low dimensional domains. Adv. Neural Inf. Process. Syst. (2020)"},{"key":"3427_CR24","doi-asserted-by":"crossref","unstructured":"Xu, K., Qin, M., Sun, F., Wang, Y., Chen, Y., Ren, F.: Learning in the frequency domain. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 1737\u20131746 (2020)","DOI":"10.1109\/CVPR42600.2020.00181"},{"key":"3427_CR25","doi-asserted-by":"crossref","unstructured":"Kothandaraman, D., Guan, T., Wang, X., Hu, S., Lin, M.C., Manocha, D.: FAR: Fourier aerial video recognition. In: Proceedings of European Conference on Computer Vision, pp. 657\u2013676 (2022)","DOI":"10.1007\/978-3-031-19836-6_37"},{"issue":"4","key":"3427_CR26","first-page":"1363","volume":"39","author":"S Guo","year":"2023","unstructured":"Guo, S., Wang, W., Wang, X., Xu, X.: Low-light image enhancement with joint illumination and noise data distribution transformation. Vis. Comput. 39(4), 1363\u20131374 (2023)","journal-title":"Vis. Comput."},{"issue":"12","key":"3427_CR27","doi-asserted-by":"publisher","first-page":"3025","DOI":"10.1109\/TMM.2020.2969790","volume":"22","author":"S Hao","year":"2020","unstructured":"Hao, S., Han, X., Guo, Y., Xu, X., Wang, M.: Low-light image enhancement with semi-decoupled decomposition. IEEE Trans. Multimed. 22(12), 3025\u20133038 (2020)","journal-title":"IEEE Trans. Multimed."},{"issue":"2","key":"3427_CR28","doi-asserted-by":"publisher","first-page":"1332","DOI":"10.1109\/TVCG.2018.2869326","volume":"26","author":"B Sheng","year":"2020","unstructured":"Sheng, B., Li, P., Jin, Y., Tan, P., Lee, T.: Intrinsic image decomposition with step and drift shading separation. IEEE Trans. Vis. Comput. Graph. 26(2), 1332\u20131346 (2020)","journal-title":"IEEE Trans. Vis. Comput. Graph."},{"issue":"4","key":"3427_CR29","doi-asserted-by":"publisher","first-page":"118","DOI":"10.1145\/3072959.3073592","volume":"36","author":"M Gharbi","year":"2017","unstructured":"Gharbi, M., Chen, J., Barron, J.T., Hasinoff, S.W., Durand, F.: Deep bilateral learning for real-time image enhancement. ACM Trans. Graph. 36(4), 118\u2013111812 (2017)","journal-title":"ACM Trans. Graph."},{"issue":"4","key":"3427_CR30","doi-asserted-by":"publisher","first-page":"1153","DOI":"10.1007\/s11263-020-01418-8","volume":"129","author":"J Liu","year":"2021","unstructured":"Liu, J., Xu, D., Yang, W., Fan, M., Huang, H.: Benchmarking low-light image enhancement and beyond. Int. J. Comput. Vis. 129(4), 1153\u20131184 (2021)","journal-title":"Int. J. Comput. Vis."},{"key":"3427_CR31","doi-asserted-by":"crossref","unstructured":"Jiang, K., Wang, Z., Wang, Z., Chen, C., Yi, P., Lu, T., Lin, C.: Degrade is upgrade: Learning degradation for low-light image enhancement. In: Proceedings of AAAI Conference on Artificial Intelligence, pp. 1078\u20131086 (2022)","DOI":"10.1609\/aaai.v36i1.19992"},{"key":"3427_CR32","doi-asserted-by":"crossref","unstructured":"Wang, T., Zhang, K., Shen, T., Luo, W., Stenger, B., Lu, T.: Ultra-high-definition low-light image enhancement: A benchmark and transformer-based method. In: Proceedings of AAAI Conference on Artificial Intelligence (2023)","DOI":"10.1609\/aaai.v37i3.25364"},{"key":"3427_CR33","doi-asserted-by":"crossref","unstructured":"Hira, S., Das, R., Modi, A., Pakhomov, D.: Delta sampling R-BERT for limited data and low-light action recognition. In: Proceedings of IEEE\/CVF Computer Vision and Pattern Recognition, pp. 853\u2013862 (2021)","DOI":"10.1109\/CVPRW53098.2021.00095"},{"key":"3427_CR34","unstructured":"Zeng, J.: Indgic: supervised action recognition under low illumination. arXiv:2308.15345 (2023)"},{"key":"3427_CR35","unstructured":"Lv, F., Lu, F., Wu, J., Lim, C.: MBLLEN: low-light image\/video enhancement using CNNs. In: Proceedings of British Machine Vision Conference, p. 220 (2018)"},{"key":"3427_CR36","doi-asserted-by":"crossref","unstructured":"Jiang, H., Zheng, Y.: Learning to see moving objects in the dark. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 7323\u20137332 (2019)","DOI":"10.1109\/ICCV.2019.00742"},{"issue":"4","key":"3427_CR37","doi-asserted-by":"publisher","first-page":"955","DOI":"10.1109\/TCSVT.2019.2901629","volume":"30","author":"B Sheng","year":"2020","unstructured":"Sheng, B., Li, P., Fang, X., Tan, P., Wu, E.: Depth-aware motion deblurring using loopy belief propagation. IEEE Trans. Circuits Syst. Video Technol. 30(4), 955\u2013969 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3427_CR38","doi-asserted-by":"crossref","unstructured":"Zhang, F., Li, Y., You, S., Fu, Y.: Learning temporal consistency for low light video enhancement from single images. In: proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4967\u20134976 (2021)","DOI":"10.1109\/CVPR46437.2021.00493"},{"key":"3427_CR39","unstructured":"Huang, S., Wang, M., Zheng, X., Chen, J., Tang, C.: Hierarchical and dynamic graph attention network for drug-disease association prediction. IEEE J. Biomed. Health Inform. 1\u201312 (2024)"},{"issue":"2","key":"3427_CR40","doi-asserted-by":"publisher","first-page":"955","DOI":"10.1109\/TPAMI.2020.3014629","volume":"44","author":"C Tang","year":"2022","unstructured":"Tang, C., Liu, X., Zheng, X., Li, W., Xiong, J., Wang, L., Zomaya, A.Y., Longo, A.: Defusionnet: defocus blur detection via recurrently fusing and refining discriminative multi-scale deep features. IEEE Trans. Pattern Anal. Mach. Intell. 44(2), 955\u2013968 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3427_CR41","unstructured":"Kim, M., Kwon, H., Wang, C., Kwak, S., Cho, M.: Relational self-attention: What\u2019s missing in attention for video understanding. In: Advances in Neural Information Processing Systems, pp. 8046\u20138059 (2021)"},{"key":"3427_CR42","doi-asserted-by":"crossref","unstructured":"Li, D., Hu, J., Wang, C., Li, X., She, Q., Zhu, L., Zhang, T., Chen, Q.: Involution: Inverting the inherence of convolution for visual recognition. In: IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 12321\u201312330 (2021)","DOI":"10.1109\/CVPR46437.2021.01214"},{"key":"3427_CR43","doi-asserted-by":"crossref","unstructured":"Kuehne, H., Jhuang, H., Garrote, E., Poggio, T.A., Serre, T.: HMDB: A large video database for human motion recognition. In: Proceedings of IEEE\/CVF International Conference on Computer Vision, pp. 2556\u20132563 (2011)","DOI":"10.1109\/ICCV.2011.6126543"},{"key":"3427_CR44","doi-asserted-by":"publisher","first-page":"3507","DOI":"10.1109\/TIP.2023.3286254","volume":"32","author":"Z Tu","year":"2023","unstructured":"Tu, Z., Liu, Y., Zhang, Y., Mu, Q., Yuan, J.: DTCM: joint optimization of dark enhancement and action recognition in videos. IEEE Trans. Image Process. 32, 3507\u20133520 (2023)","journal-title":"IEEE Trans. Image Process."},{"key":"3427_CR45","doi-asserted-by":"crossref","unstructured":"Carreira, J., Zisserman, A.: Quo vadis, action recognition? A new model and the kinetics dataset. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp. 4724\u20134733 (2017)","DOI":"10.1109\/CVPR.2017.502"},{"key":"3427_CR46","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. In: Bengio, Y., LeCun, Y. (eds.) Proceedings of International Conference on Learning Representations (2015)"},{"issue":"4","key":"3427_CR47","first-page":"2140","volume":"44","author":"X Long","year":"2022","unstructured":"Long, X., de Melo, G., He, D., Li, F., Chi, Z., Wen, S., Gan, C.: Purely attention based local feature integration for video classification. IEEE Trans. Pattern Anal. Mach. Intell. 44(4), 2140\u20132154 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3427_CR48","doi-asserted-by":"publisher","first-page":"288","DOI":"10.1109\/TMM.2021.3050069","volume":"24","author":"X Chen","year":"2021","unstructured":"Chen, X., Gao, C., Li, C., Yang, Y., Meng, D.: Infrared action detection in the dark via cross-stream attention mechanism. IEEE Trans. Multimedia 24, 288\u2013300 (2021)","journal-title":"IEEE Trans. Multimedia"},{"key":"3427_CR49","doi-asserted-by":"crossref","unstructured":"Munsif, M., Khan, S.U., Khan, N., Baik, S.W.: Attention-based deep learning framework for action recognition in a dark environment. Hum. Cent. Comput. Inf. Sci. 14 (2024)","DOI":"10.1016\/j.knosys.2024.112480"},{"key":"3427_CR50","doi-asserted-by":"crossref","unstructured":"Li, J., Wei, P., Zhang, Y., Zheng, N.: A slow-i-fast-p architecture for compressed video action recognition. In: Proceedings of ACM Multimedia, pp. 2039\u20132047 (2020)","DOI":"10.1145\/3394171.3413641"},{"key":"3427_CR51","doi-asserted-by":"crossref","unstructured":"Li, Y., Ji, B., Shi, X., Zhang, J., Kang, B., Wang, L.: TEA: Temporal excitation and aggregation for action recognition. In: Proceedings of IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp. 906\u2013915 (2020)","DOI":"10.1109\/CVPR42600.2020.00099"},{"key":"3427_CR52","doi-asserted-by":"crossref","unstructured":"Li, X., Shuai, B., Tighe, J.: Directional temporal modeling for action recognition. In: Proceedings of European Conference on Computer Vision, pp. 275\u2013291 (2020)","DOI":"10.1007\/978-3-030-58539-6_17"},{"key":"3427_CR53","unstructured":"Bertasius, G., Wang, H., Torresani, L.: Is space-time attention all you need for video understanding? In: Proceedings of International Conference on Machine Learning, pp. 813\u2013824 (2021)"},{"key":"3427_CR54","unstructured":"Li, K., Li, X., Wang, Y., Wang, J., Qiao, Y.: CT-Net: Channel tensorization network for video classification. In: Proceedings of International Conference Learning Representation, pp. 1\u201313 (2021)"},{"key":"3427_CR55","doi-asserted-by":"crossref","unstructured":"Gowda, S.N., Rohrbach, M., Sevilla-Lara, L.: SMART frame selection for action recognition. In: Proceedings AAAI Conference on Artificial Intelligence, pp. 1451\u20131459 (2021)","DOI":"10.1609\/aaai.v35i2.16235"},{"key":"3427_CR56","doi-asserted-by":"crossref","unstructured":"Tian, Y., Zhong, X., Liu, W., Jia, X., Zhao, S., Ye, M.: Random walk erasing with attention calibration for action recognition. In: Proceedings of Pacific Rim International Conference on Artificial Intelligence, pp. 236\u2013251 (2021)","DOI":"10.1007\/978-3-030-89370-5_18"},{"key":"3427_CR57","doi-asserted-by":"crossref","unstructured":"Li, K., Zhang, Z., Wu, G., Xiong, X., Lee, C., Lu, Z., Fu, Y., Pfister, T.: Learning from weakly-labeled web videos via exploring sub-concepts. In: Proceedings of AAAI Conference on Artificial Intelligence, pp. 1341\u20131349 (2022)","DOI":"10.1609\/aaai.v36i2.20022"},{"issue":"5","key":"3427_CR58","first-page":"2760","volume":"44","author":"J Lin","year":"2022","unstructured":"Lin, J., Gan, C., Wang, K., Han, S.: TSM: temporal shift module for efficient and scalable video understanding on edge devices. IEEE Trans. Pattern Anal. Mach. Intell. 44(5), 2760\u20132774 (2022)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3427_CR59","doi-asserted-by":"crossref","unstructured":"Luo, H., Lin, G., Yao, Y., Tang, Z., Wu, Q., Hua, X.: Dense semantics-assisted networks for video action recognition. IEEE Trans. Circuits Syst. Video Technol. 32(5), 3073\u20133084 (2022)","DOI":"10.1109\/TCSVT.2021.3100842"},{"key":"3427_CR60","doi-asserted-by":"publisher","first-page":"231","DOI":"10.1016\/j.neucom.2022.10.037","volume":"516","author":"S Alfasly","year":"2023","unstructured":"Alfasly, S., Lu, J., Xu, C., Al-Huda, Z., Jiang, Q., Lu, Z., Chui, C.K.: FastPicker: adaptive independent two-stage video-to-video summarization for efficient action recognition. Neurocomputing 516, 231\u2013244 (2023)","journal-title":"Neurocomputing"},{"issue":"3","key":"3427_CR61","doi-asserted-by":"publisher","first-page":"977","DOI":"10.1109\/TCSVT.2022.3207518","volume":"33","author":"X Sheng","year":"2023","unstructured":"Sheng, X., Li, K., Shen, Z., Xiao, G.: A progressive difference method for capturing visual tempos on action recognition. IEEE Trans. Circuits Syst. Video Technol. 33(3), 977\u2013987 (2023)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"3","key":"3427_CR62","first-page":"3347","volume":"45","author":"M Wang","year":"2023","unstructured":"Wang, M., Xing, J., Su, J., Chen, J., Liu, Y.: Learning spatiotemporal and motion features in a unified 2d network for action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 45(3), 3347\u20133362 (2023)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"3427_CR63","doi-asserted-by":"crossref","unstructured":"Rasheed, H.A., Khattak, M.U., Maaz, M., Khan, S.H., Khan, F.S.: Fine-tuned CLIP models are efficient video learners. In: Proceedings of IEEE\/CVF Computer Vision and Pattern Recognition Conference, pp. 6545\u20136554 (2023)","DOI":"10.1109\/CVPR52729.2023.00633"},{"key":"3427_CR64","doi-asserted-by":"crossref","unstructured":"Yang, C., Xu, Y., Shi, J., Dai, B., Zhou, B.: Temporal pyramid network for action recognition. In: Proceedings of IEEE\/CVF Conference on Computer Vision Pattern Recognition (2020)","DOI":"10.1109\/CVPR42600.2020.00067"},{"key":"3427_CR65","unstructured":"Yang, T., Zhu, Y., Xie, Y., Zhang, A., Chen, C., Li, M.: AIM: adapting image models for efficient video action recognition. In: Proceedings of International Conference on Learning Representation (2023)"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03427-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03427-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03427-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,3]],"date-time":"2025-02-03T12:38:52Z","timestamp":1738586332000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03427-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,5,14]]},"references-count":65,"journal-issue":{"issue":"2","published-print":{"date-parts":[[2025,1]]}},"alternative-id":["3427"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03427-x","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,5,14]]},"assertion":[{"value":"22 April 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"14 May 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"The authors declare that there is no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}