{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,3,18]],"date-time":"2026-03-18T04:14:41Z","timestamp":1773807281234,"version":"3.50.1"},"reference-count":66,"publisher":"Springer Science and Business Media LLC","issue":"3","license":[{"start":{"date-parts":[[2024,6,8]],"date-time":"2024-06-08T00:00:00Z","timestamp":1717804800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2024,6,8]],"date-time":"2024-06-08T00:00:00Z","timestamp":1717804800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100005052","name":"Mission on Nano Science and Technology","doi-asserted-by":"publisher","award":["22JCQNJC01380, 23JCYBJC00360"],"award-info":[{"award-number":["22JCQNJC01380, 23JCYBJC00360"]}],"id":[{"id":"10.13039\/501100005052","id-type":"DOI","asserted-by":"publisher"}]},{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["42005066, 62376197, 62202332, 62020106004, 92048301"],"award-info":[{"award-number":["42005066, 62376197, 62202332, 62020106004, 92048301"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Vis Comput"],"published-print":{"date-parts":[[2025,2]]},"DOI":"10.1007\/s00371-024-03516-x","type":"journal-article","created":{"date-parts":[[2024,6,8]],"date-time":"2024-06-08T14:01:35Z","timestamp":1717855295000},"page":"1971-1986","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":12,"title":["Dual-stage temporal perception network for continuous sign language recognition"],"prefix":"10.1007","volume":"41","author":[{"given":"Zhigang","family":"Huang","sequence":"first","affiliation":[]},{"ORCID":"https:\/\/orcid.org\/0000-0002-6031-9334","authenticated-orcid":false,"given":"Wanli","family":"Xue","sequence":"additional","affiliation":[]},{"given":"Yuxi","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Jinlu","family":"Sun","sequence":"additional","affiliation":[]},{"given":"Yazhou","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Tiantian","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"Shengyong","family":"Chen","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2024,6,8]]},"reference":[{"key":"3516_CR1","doi-asserted-by":"publisher","first-page":"1750","DOI":"10.1109\/TMM.2021.3070438","volume":"24","author":"N Adaloglou","year":"2021","unstructured":"Adaloglou, N., Chatzis, T., Papastratis, I., Stergioulas, A., Papadopoulos, G.T., Zacharopoulou, V., Xydopoulos, G.J., Atzakas, K., Papazachariou, D., Daras, P.: A comprehensive study on deep learning-based methods for sign language recognition. IEEE Trans. Multimedia 24, 1750\u20131762 (2021)","journal-title":"IEEE Trans. Multimedia"},{"key":"3516_CR2","doi-asserted-by":"crossref","unstructured":"Li, H., Gao, L., Han, R., Wan, L., Feng, W.: Key action and joint ctc-attention based sign language recognition. In: ICASSP 2020-2020 IEEE international conference on acoustics, speech and signal processing (ICASSP), 2348\u20132352 (2020). IEEE","DOI":"10.1109\/ICASSP40776.2020.9054316"},{"issue":"3","key":"3516_CR3","doi-asserted-by":"publisher","first-page":"1138","DOI":"10.1109\/TCSVT.2020.2999384","volume":"31","author":"C Wei","year":"2020","unstructured":"Wei, C., Zhao, J., Zhou, W., Li, H.: Semantic boundary detection with reinforcement learning for continuous sign language recognition. IEEE Trans. Circuits Syst. Video Technol. 31(3), 1138\u20131149 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"1","key":"3516_CR4","doi-asserted-by":"publisher","first-page":"26","DOI":"10.1007\/s44267-023-00028-5","volume":"1","author":"W Xue","year":"2023","unstructured":"Xue, W., Liu, J., Yan, S., Zhou, Y., Yuan, T., Guo, Q.: Alleviating data insufficiency for chinese sign language recognition. Vis. Intell. 1(1), 26 (2023)","journal-title":"Vis. Intell."},{"key":"3516_CR5","doi-asserted-by":"crossref","unstructured":"Xue, W., Kang, Z., Guo, L., Yang, S., Yuan, T., Chen, S.: Continuous sign language recognition for hearing-impaired consumer communication via self-guidance network. IEEE Transactions on Consumer Electronics (2023)","DOI":"10.1109\/TCE.2023.3342163"},{"key":"3516_CR6","doi-asserted-by":"crossref","unstructured":"Min, Y., Hao, A., Chai, X., Chen, X.: Visual alignment constraint for continuous sign language recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision, 11542\u201311551 (2021)","DOI":"10.1109\/ICCV48922.2021.01134"},{"key":"3516_CR7","doi-asserted-by":"crossref","unstructured":"Zhou, H., Zhou, W., Zhou, Y., Li, H.: Spatial-temporal multi-cue network for continuous sign language recognition. In: Proceedings of the AAAI conference on artificial intelligence, 34, 13009\u201313016 (2020)","DOI":"10.1609\/aaai.v34i07.7001"},{"key":"3516_CR8","doi-asserted-by":"crossref","unstructured":"Cihan\u00a0Camgoz, N., Hadfield, S., Koller, O., Bowden, R.: Subunets: End-to-end hand shape and continuous sign language recognition. In: Proceedings of the IEEE international conference on computer vision, 3056\u20133065 (2017)","DOI":"10.1109\/ICCV.2017.332"},{"key":"3516_CR9","doi-asserted-by":"crossref","unstructured":"Koller, O., Zargaran, S., Ney, H.: Re-sign: Re-aligned end-to-end sequence modelling with deep recurrent cnn-hmms. In: Proceedings of the IEEE conference on computer vision and pattern recognition, 4297\u20134305 (2017)","DOI":"10.1109\/CVPR.2017.364"},{"key":"3516_CR10","doi-asserted-by":"crossref","unstructured":"Niu, Z., Mak, B.: Stochastic fine-grained labeling of multi-state sign glosses for continuous sign language recognition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XVI 16, 172\u2013186 (2020). Springer","DOI":"10.1007\/978-3-030-58517-4_11"},{"key":"3516_CR11","doi-asserted-by":"crossref","unstructured":"Pu, J., Zhou, W., Li, H.: Iterative alignment network for continuous sign language recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 4165\u20134174 (2019)","DOI":"10.1109\/CVPR.2019.00429"},{"key":"3516_CR12","doi-asserted-by":"crossref","unstructured":"Zhang, Z., Pu, J., Zhuang, L., Zhou, W., Li, H.: Continuous sign language recognition via reinforcement learning. In: 2019 IEEE international conference on image processing (ICIP), 285\u2013289 (2019). IEEE","DOI":"10.1109\/ICIP.2019.8802972"},{"key":"3516_CR13","doi-asserted-by":"crossref","unstructured":"Wang, S., Guo, D., Zhou, W.-g., Zha, Z.-J., Wang, M.: Connectionist temporal fusion for sign language translation. In: Proceedings of the 26th ACM international conference on multimedia, 1483\u20131491 (2018)","DOI":"10.1145\/3240508.3240671"},{"issue":"7","key":"3516_CR14","doi-asserted-by":"publisher","first-page":"1880","DOI":"10.1109\/TMM.2018.2889563","volume":"21","author":"R Cui","year":"2019","unstructured":"Cui, R., Liu, H., Zhang, C.: A deep neural framework for continuous sign language recognition by iterative training. IEEE Trans. Multimed. 21(7), 1880\u20131891 (2019)","journal-title":"IEEE Trans. Multimed."},{"key":"3516_CR15","doi-asserted-by":"crossref","unstructured":"Hao, A., Min, Y., Chen, X.: Self-mutual distillation learning for continuous sign language recognition. In: Proceedings of the IEEE\/CVF international conference on computer vision, 11303\u201311312 (2021)","DOI":"10.1109\/ICCV48922.2021.01111"},{"key":"3516_CR16","unstructured":"Hu, L., Gao, L., Feng, W., et al.: Self-emphasizing network for continuous sign language recognition. arXiv preprint arXiv:2211.17081 (2022)"},{"key":"3516_CR17","unstructured":"Yang, T., Zhang, H., Hu, W., Chen, C., Wang, X.: Fast-parc: Position aware global kernel for convnets and vits. arXiv preprint arXiv:2210.04020 (2022)"},{"key":"3516_CR18","doi-asserted-by":"crossref","unstructured":"Dai, R., Das, S., Kahatapitiya, K., Ryoo, M.S., Br\u00e9mond, F.: Ms-tct: multi-scale temporal convtransformer for action detection. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 20041\u201320051 (2022)","DOI":"10.1109\/CVPR52688.2022.01941"},{"key":"3516_CR19","doi-asserted-by":"crossref","unstructured":"Zhao, Q., Sheng, T., Wang, Y., Tang, Z., Chen, Y., Cai, L., Ling, H.: M2det: A single-shot object detector based on multi-level feature pyramid network. In: Proceedings of the AAAI conference on artificial intelligence, 33, 9259\u20139266 (2019)","DOI":"10.1609\/aaai.v33i01.33019259"},{"key":"3516_CR20","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1016\/j.cviu.2015.09.013","volume":"141","author":"O Koller","year":"2015","unstructured":"Koller, O., Forster, J., Ney, H.: Continuous sign language recognition: Towards large vocabulary statistical recognition systems handling multiple signers. Comput. Vis. Image Underst. 141, 108\u2013125 (2015)","journal-title":"Comput. Vis. Image Underst."},{"key":"3516_CR21","doi-asserted-by":"crossref","unstructured":"Cui, R., Liu, H., Zhang, C.: Recurrent convolutional neural networks for continuous sign language recognition by staged optimization. In: Proceedings of the IEEE conference on computer vision and pattern recognition, 7361\u20137369 (2017)","DOI":"10.1109\/CVPR.2017.175"},{"key":"3516_CR22","doi-asserted-by":"crossref","unstructured":"Zuo, R., Mak, B.: C2slr: Consistency-enhanced continuous sign language recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 5131\u20135140 (2022)","DOI":"10.1109\/CVPR52688.2022.00507"},{"key":"3516_CR23","doi-asserted-by":"crossref","unstructured":"Hu, L., Gao, L., Liu, Z., Feng, W.: Temporal lift pooling for continuous sign language recognition. In: Computer Vision\u2013ECCV 2022: 17th European Conference, Tel Aviv, Israel, October 23\u201327, 2022, Proceedings, Part XXXV, 511\u2013527 (2022). Springer","DOI":"10.1007\/978-3-031-19833-5_30"},{"key":"3516_CR24","doi-asserted-by":"crossref","unstructured":"Graves, A., Fern\u00e1ndez, S., Gomez, F., Schmidhuber, J.: Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks. In: Proceedings of the 23rd international conference on machine learning, 369\u2013376 (2006)","DOI":"10.1145\/1143844.1143891"},{"key":"3516_CR25","unstructured":"Radford, A., Narasimhan, K., Salimans, T., Sutskever, I., et al.: Improving language understanding by generative pre-training (2018)"},{"key":"3516_CR26","first-page":"15908","volume":"34","author":"K Han","year":"2021","unstructured":"Han, K., Xiao, A., Wu, E., Guo, J., Xu, C., Wang, Y.: Transformer in transformer. Adv. Neural. Inf. Process. Syst. 34, 15908\u201315919 (2021)","journal-title":"Adv. Neural. Inf. Process. Syst."},{"key":"3516_CR27","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102043","volume":"102","author":"C Tian","year":"2024","unstructured":"Tian, C., Zheng, M., Zuo, W., Zhang, S., Zhang, Y., Lin, C.-W.: A cross transformer for image denoising. Inf. Fusion 102, 102043 (2024)","journal-title":"Inf. Fusion"},{"key":"3516_CR28","unstructured":"Li, S., Jin, X., Xuan, Y., Zhou, X., Chen, W., Wang, Y.-X., Yan, X.: Enhancing the locality and breaking the memory bottleneck of transformer on time series forecasting. Advances in neural information processing systems 32 (2019)"},{"key":"3516_CR29","doi-asserted-by":"crossref","unstructured":"Pu, J., Zhou, W., Li, H.: Dilated convolutional network with iterative optimization for continuous sign language recognition. In: IJCAI, 3, 7 (2018)","DOI":"10.24963\/ijcai.2018\/123"},{"key":"3516_CR30","doi-asserted-by":"crossref","unstructured":"Guo, D., Wang, S., Tian, Q., Wang, M.: Dense temporal convolution network for sign language translation. In: IJCAI, 744\u2013750 (2019)","DOI":"10.24963\/ijcai.2019\/105"},{"key":"3516_CR31","doi-asserted-by":"crossref","unstructured":"Zhou, H., Zhou, W., Li, H.: Dynamic pseudo label decoding for continuous sign language recognition. In: 2019 IEEE international conference on multimedia and expo (ICME), pp. 1282\u20131287 (2019). IEEE","DOI":"10.1109\/ICME.2019.00223"},{"key":"3516_CR32","doi-asserted-by":"crossref","unstructured":"Girdhar, R., Grauman, K.: Anticipative video transformer. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 13505\u201313515 (2021)","DOI":"10.1109\/ICCV48922.2021.01325"},{"key":"3516_CR33","doi-asserted-by":"crossref","unstructured":"Farha, Y.A., Gall, J.: Ms-tcn: Multi-stage temporal convolutional network for action segmentation. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, 3575\u20133584 (2019)","DOI":"10.1109\/CVPR.2019.00369"},{"key":"3516_CR34","doi-asserted-by":"crossref","unstructured":"Yang, C., Xu, Y., Shi, J., Dai, B., Zhou, B.: Temporal pyramid network for action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 591\u2013600 (2020)","DOI":"10.1109\/CVPR42600.2020.00067"},{"key":"3516_CR35","doi-asserted-by":"crossref","unstructured":"Wang, L., Tong, Z., Ji, B., Wu, G.: Tdn: Temporal difference networks for efficient action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 1895\u20131904 (2021)","DOI":"10.1109\/CVPR46437.2021.00193"},{"key":"3516_CR36","doi-asserted-by":"crossref","unstructured":"Dai, R., Das, S., Minciullo, L., Garattoni, L., Francesca, G., Bremond, F.: Pdan: Pyramid dilated attention network for action detection. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp. 2970\u20132979 (2021)","DOI":"10.1109\/WACV48630.2021.00301"},{"key":"3516_CR37","doi-asserted-by":"crossref","unstructured":"Wu, H., Xiao, B., Codella, N., Liu, M., Dai, X., Yuan, L., Zhang, L.: Cvt: Introducing convolutions to vision transformers. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp. 22\u201331 (2021)","DOI":"10.1109\/ICCV48922.2021.00009"},{"key":"3516_CR38","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Vanhoucke, V., Ioffe, S., Shlens, J., Wojna, Z.: Rethinking the inception architecture for computer vision. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 2818\u20132826 (2016)","DOI":"10.1109\/CVPR.2016.308"},{"key":"3516_CR39","doi-asserted-by":"crossref","unstructured":"Zhang, X., Zhou, X., Lin, M., Sun, J.: Shufflenet: An extremely efficient convolutional neural network for mobile devices. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 6848\u20136856 (2018)","DOI":"10.1109\/CVPR.2018.00716"},{"key":"3516_CR40","doi-asserted-by":"crossref","unstructured":"Chollet, F.: Xception: Deep learning with depthwise separable convolutions. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 1251\u20131258 (2017)","DOI":"10.1109\/CVPR.2017.195"},{"key":"3516_CR41","doi-asserted-by":"publisher","DOI":"10.1016\/j.inffus.2023.102033","volume":"102","author":"X Ning","year":"2024","unstructured":"Ning, X., Yu, Z., Li, L., Li, W., Tiwari, P.: Dilf: Differentiable rendering-based multi-view image-language fusion for zero-shot 3d shape understanding. Inf. Fusion 102, 102033 (2024)","journal-title":"Inf. Fusion"},{"issue":"9","key":"3516_CR42","doi-asserted-by":"publisher","first-page":"3391","DOI":"10.1109\/TCSVT.2020.3043026","volume":"31","author":"X Ning","year":"2020","unstructured":"Ning, X., Gong, K., Li, W., Zhang, L., Bai, X., Tian, S.: Feature refinement and filter network for person re-identification. IEEE Trans. Circuits Syst. Video Technol. 31(9), 3391\u20133402 (2020)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"key":"3516_CR43","doi-asserted-by":"crossref","unstructured":"Tian, C., Zhang, X., Zhang, Q., Yang, M., Ju, Z.: Image super-resolution via dynamic network. CAAI Transactions on Intelligence Technology (2023)","DOI":"10.1049\/cit2.12297"},{"key":"3516_CR44","doi-asserted-by":"crossref","unstructured":"Sandler, M., Howard, A., Zhu, M., Zhmoginov, A., Chen, L.-C.: Mobilenetv2: Inverted residuals and linear bottlenecks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 4510\u20134520 (2018)","DOI":"10.1109\/CVPR.2018.00474"},{"key":"3516_CR45","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 770\u2013778 (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"3516_CR46","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A.N., Kaiser, \u0141., Polosukhin, I.: Attention is all you need. Advances in neural information processing systems 30 (2017)"},{"key":"3516_CR47","unstructured":"Fu, L., Tian, H., Zhai, X.B., Gao, P., Peng, X.: Incepformer: Efficient inception transformer with pyramid pooling for semantic segmentation. arXiv preprint arXiv:2212.03035 (2022)"},{"key":"3516_CR48","doi-asserted-by":"crossref","unstructured":"Huang, J., Zhou, W., Zhang, Q., Li, H., Li, W.: Video-based sign language recognition without temporal segmentation. In: Proceedings of the AAAI conference on artificial intelligence, 32 (2018)","DOI":"10.1609\/aaai.v32i1.11903"},{"key":"3516_CR49","unstructured":"Dreuw, P., Neidle, C., Athitsos, V., Sclaroff, S., Ney, H.: Benchmark databases for video-based automatic sign language recognition. In: LREC (2008)"},{"key":"3516_CR50","doi-asserted-by":"crossref","unstructured":"Camgoz, N.C., Hadfield, S., Koller, O., Ney, H., Bowden, R.: Neural sign language translation. In: Proceedings of the IEEE conference on computer vision and pattern recognition, 7784\u20137793 (2018)","DOI":"10.1109\/CVPR.2018.00812"},{"key":"3516_CR51","unstructured":"Forster, J., Schmidt, C., Koller, O., Bellgardt, M., Ney, H.: Extensions of the sign language recognition and translation corpus rwth-phoenix-weather. In: LREC, pp. 1911\u20131916 (2014)"},{"key":"3516_CR52","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, L.: Imagenet: A large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, pp. 248\u2013255 (2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"3516_CR53","unstructured":"Kingma, D.P., Ba, J.: Adam: A method for stochastic optimization. arXiv preprint arXiv:1412.6980 (2014)"},{"key":"3516_CR54","doi-asserted-by":"crossref","unstructured":"Pu, J., Zhou, W., Hu, H., Li, H.: Boosting continuous sign language recognition via cross modality augmentation. In: Proceedings of the 28th ACM international conference on multimedia, pp. 1497\u20131505 (2020)","DOI":"10.1145\/3394171.3413931"},{"key":"3516_CR55","doi-asserted-by":"crossref","unstructured":"Cheng, K.L., Yang, Z., Chen, Q., Tai, Y.-W.: Fully convolutional networks for continuous sign language recognition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, UK, August 23\u201328, 2020, Proceedings, Part XXIV 16, pp. 697\u2013714 (2020). Springer","DOI":"10.1007\/978-3-030-58586-0_41"},{"key":"3516_CR56","unstructured":"Yang, Z., Shi, Z., Shen, X., Tai, Y.-W.: Sf-net: Structured feature network for continuous sign language recognition. arXiv preprint arXiv:1908.01341 (2019)"},{"key":"3516_CR57","doi-asserted-by":"crossref","unstructured":"Szegedy, C., Ioffe, S., Vanhoucke, V., Alemi, A.: Inception-v4, inception-resnet and the impact of residual connections on learning. In: Proceedings of the AAAI conference on artificial intelligence, vol. 31 (2017)","DOI":"10.1609\/aaai.v31i1.11231"},{"key":"3516_CR58","doi-asserted-by":"crossref","unstructured":"Dai, Y., Gieseke, F., Oehmcke, S., Wu, Y., Barnard, K.: Attentional feature fusion. In: Proceedings of the IEEE\/CVF winter conference on applications of computer vision, pp. 3560\u20133569 (2021)","DOI":"10.1109\/WACV48630.2021.00360"},{"key":"3516_CR59","doi-asserted-by":"crossref","unstructured":"Guo, L., Xue, W., Guo, Q., Liu, B., Zhang, K., Yuan, T., Chen, S.: Distilling cross-temporal contexts for continuous sign language recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 10771\u201310780 (2023)","DOI":"10.1109\/CVPR52729.2023.01037"},{"issue":"1","key":"3516_CR60","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1007\/s44267-024-00037-y","volume":"2","author":"W Zhao","year":"2024","unstructured":"Zhao, W., Xu, L.: Weakly supervised target detection based on spatial attention. Vis. Intell. 2(1), 1\u201311 (2024)","journal-title":"Vis. Intell."},{"issue":"1","key":"3516_CR61","doi-asserted-by":"publisher","first-page":"25","DOI":"10.1007\/s44267-023-00027-6","volume":"1","author":"Y Wang","year":"2023","unstructured":"Wang, Y., Cao, C., Zhang, Y.: Visual-semantic network: a visual and semantic enhanced model for gesture recognition. Vis. Intell. 1(1), 25 (2023)","journal-title":"Vis. Intell."},{"key":"3516_CR62","doi-asserted-by":"crossref","unstructured":"Selvaraju, R.R., Cogswell, M., Das, A., Vedantam, R., Parikh, D., Batra, D.: Grad-cam: Visual explanations from deep networks via gradient-based localization. In: Proceedings of the IEEE international conference on computer vision, pp. 618\u2013626 (2017)","DOI":"10.1109\/ICCV.2017.74"},{"key":"3516_CR63","doi-asserted-by":"crossref","unstructured":"Hu, J., Shen, L., Sun, G.: Squeeze-and-excitation networks. In: Proceedings of the IEEE conference on computer vision and pattern recognition, pp. 7132\u20137141 (2018)","DOI":"10.1109\/CVPR.2018.00745"},{"key":"3516_CR64","doi-asserted-by":"crossref","unstructured":"Woo, S., Park, J., Lee, J.-Y., Kweon, I.S.: Cbam: Convolutional block attention module. In: Proceedings of the European conference on computer vision (ECCV), pp. 3\u201319 (2018)","DOI":"10.1007\/978-3-030-01234-2_1"},{"key":"3516_CR65","doi-asserted-by":"crossref","unstructured":"Wang, Z., She, Q., Smolic, A.: Action-net: Multipath excitation for action recognition. In: Proceedings of the IEEE\/CVF conference on computer vision and pattern recognition, pp. 13214\u201313223 (2021)","DOI":"10.1109\/CVPR46437.2021.01301"},{"key":"3516_CR66","unstructured":"Liu, Y., Shao, Z., Teng, Y., Hoffmann, N.: Nam: Normalization-based attention module. arXiv preprint arXiv:2111.12419 (2021)"}],"container-title":["The Visual Computer"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03516-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00371-024-03516-x\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00371-024-03516-x.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,2,12]],"date-time":"2025-02-12T14:56:32Z","timestamp":1739372192000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00371-024-03516-x"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,6,8]]},"references-count":66,"journal-issue":{"issue":"3","published-print":{"date-parts":[[2025,2]]}},"alternative-id":["3516"],"URL":"https:\/\/doi.org\/10.1007\/s00371-024-03516-x","relation":{},"ISSN":["0178-2789","1432-2315"],"issn-type":[{"value":"0178-2789","type":"print"},{"value":"1432-2315","type":"electronic"}],"subject":[],"published":{"date-parts":[[2024,6,8]]},"assertion":[{"value":"24 April 2024","order":1,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"8 June 2024","order":2,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There are no Conflict of interest.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}