{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,16]],"date-time":"2025-12-16T12:42:44Z","timestamp":1765888964083,"version":"3.37.3"},"reference-count":37,"publisher":"Springer Science and Business Media LLC","issue":"5","license":[{"start":{"date-parts":[[2022,8,24]],"date-time":"2022-08-24T00:00:00Z","timestamp":1661299200000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"},{"start":{"date-parts":[[2022,8,24]],"date-time":"2022-08-24T00:00:00Z","timestamp":1661299200000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springer.com\/tdm"}],"funder":[{"DOI":"10.13039\/501100001809","name":"National Natural Science Foundation of China","doi-asserted-by":"publisher","award":["61973334"],"award-info":[{"award-number":["61973334"]}],"id":[{"id":"10.13039\/501100001809","id-type":"DOI","asserted-by":"publisher"}]},{"name":"Research Center of Security Video and Image Processing Engineering Technology of Guizhou","award":["[2020]001]"],"award-info":[{"award-number":["[2020]001]"]}]},{"DOI":"10.13039\/501100012237","name":"Beijing Advanced Innovation Center for Intelligent Robots and Systems, Beijing Institute of Technology","doi-asserted-by":"publisher","award":["2018IRS20"],"award-info":[{"award-number":["2018IRS20"]}],"id":[{"id":"10.13039\/501100012237","id-type":"DOI","asserted-by":"publisher"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Machine Vision and Applications"],"published-print":{"date-parts":[[2022,9]]},"DOI":"10.1007\/s00138-022-01330-w","type":"journal-article","created":{"date-parts":[[2022,8,24]],"date-time":"2022-08-24T08:04:04Z","timestamp":1661328244000},"update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":3,"title":["Two-stream lightweight sign language transformer"],"prefix":"10.1007","volume":"33","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8253-9373","authenticated-orcid":false,"given":"Yuming","family":"Chen","sequence":"first","affiliation":[]},{"given":"Xue","family":"Mei","sequence":"additional","affiliation":[]},{"given":"Xuan","family":"Qin","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2022,8,24]]},"reference":[{"issue":"1","key":"1330_CR1","doi-asserted-by":"publisher","first-page":"221","DOI":"10.1109\/TPAMI.2012.59","volume":"35","author":"S Ji","year":"2013","unstructured":"Ji, S., Xu, W., Yang, M.: 3D convolutional neural networks for human action recognition. IEEE Trans. Pattern Anal. Mach. Intell. 35(1), 221\u2013231 (2013)","journal-title":"IEEE Trans. Pattern Anal. Mach. Intell."},{"key":"1330_CR2","doi-asserted-by":"crossref","unstructured":"Barros, P., Magg, S., Weber, C.: A multichannel convolutional neural network for hand posture recognition. In: International Conference on Artificial Neural Networks. Springer (2014)","DOI":"10.1007\/978-3-319-11179-7_51"},{"key":"1330_CR3","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zhou, W., Li, H.: A threshold-based HMM-DTW approach for continuous sign language recognition. ACM (2014)","DOI":"10.1145\/2632856.2632931"},{"key":"1330_CR4","doi-asserted-by":"crossref","unstructured":"Koller, O., Zargaran, S., Ney, H.: Re-sign: re-aligned end-to-end sequence modelling with deep recurrent CNN-HMMs. In: IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017)","DOI":"10.1109\/CVPR.2017.364"},{"key":"1330_CR5","first-page":"1","volume":"32","author":"J Huang","year":"2018","unstructured":"Huang, J., Zhou, W., Li, H.: Attention-based 3D-CNNs for large-vocabulary sign language recognition. IEEE Trans. Circuits Syst. Video Technol. 32, 1 (2018)","journal-title":"IEEE Trans. Circuits Syst. Video Technol."},{"issue":"7","key":"1330_CR6","first-page":"1880","volume":"21","author":"R Cui","year":"2019","unstructured":"Cui, R., Liu, H., Zhang, C.: A deep neural framework for continuous sign language recognition by iterative training. TMM 21(7), 1880\u20131891 (2019)","journal-title":"TMM"},{"key":"1330_CR7","doi-asserted-by":"crossref","unstructured":"Buehler, P., Zisserman, A., Everingham, M.: Learning sign language by watching TV (using weakly aligned subtitles). In: CVPR (2009)","DOI":"10.1109\/CVPR.2009.5206523"},{"key":"1330_CR8","doi-asserted-by":"crossref","unstructured":"Pfister, T., Charles, J., Zisserman, A.: Large-scale learning of sign language by watching TV (using co-occurrences). In: BMVC (2013)","DOI":"10.5244\/C.27.20"},{"key":"1330_CR9","first-page":"108","volume":"141","author":"O Koller","year":"2015","unstructured":"Koller, O., Forster, J., Ney, H.: Continuous sign language recognition: towards large vocabulary statistical recognition systems handling multiple signers. CVIU 141, 108\u2013125 (2015)","journal-title":"CVIU"},{"key":"1330_CR10","doi-asserted-by":"crossref","unstructured":"Cihan Camgoz, N., Hadfield, S., Koller, O., Bowden, R.: SubUNets: end-to-end hand shape and continuous sign language recognition. In: ICCV (2017)","DOI":"10.1109\/ICCV.2017.332"},{"key":"1330_CR11","doi-asserted-by":"crossref","unstructured":"Camgoz, N.C., Hadfield, S., Koller, O., Ney, H., Bowden, R.: Neural sign language translation. In: Proceedings of IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2018), pp. 7784\u20137793 (2018)","DOI":"10.1109\/CVPR.2018.00812"},{"issue":"13","key":"1330_CR12","doi-asserted-by":"publisher","first-page":"2683","DOI":"10.3390\/app9132683","volume":"9","author":"SK Ko","year":"2019","unstructured":"Ko, S.K., Kim, C.J., Jung, H., et al.: Neural sign language translation based on human key-point estimation. Appl. Sci. 9(13), 2683 (2019). https:\/\/doi.org\/10.3390\/app9132683","journal-title":"Appl. Sci."},{"key":"1330_CR13","unstructured":"Camgoz, N.C., Koller, O., Hadfield, S., Bowden, R.: Sign language transformers: joint end-to-end sign language recognition and translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR2020) (2020)"},{"key":"1330_CR14","doi-asserted-by":"crossref","unstructured":"Zhou, H., Zhou, W., Zhou, Y., Li, H.: Spatial-temporal multi-cue network for continuous sign language recognition. In: The Thirty-Fourth AAAI Conference on Artificial Intelligence (AAAI 2020), pp. 13009\u201313016 (2020)","DOI":"10.1609\/aaai.v34i07.7001"},{"key":"1330_CR15","doi-asserted-by":"crossref","unstructured":"Orbay, A., Akarun, L.: Neural sign language translation by learning tokenization. arXiv:2002.00479 (2020)","DOI":"10.1109\/FG47880.2020.00002"},{"issue":"3","key":"1330_CR16","first-page":"80:1","volume":"17","author":"H HU","year":"2021","unstructured":"HU, H., ZHOU, W.G., PU, J.F., LI, H.Q.: Global-local enhancement network for NMF-aware sign language recognition. ACM Trans. Multimedia Comput. Commun. Appl. TOMM 17(3), 80:1-80:19 (2021)","journal-title":"ACM Trans. Multimedia Comput. Commun. Appl. TOMM"},{"key":"1330_CR17","doi-asserted-by":"publisher","DOI":"10.1109\/TMM.2021.3117124","author":"SG Tang","year":"2021","unstructured":"Tang, S.G., Guo, D., Richang, H., Wang, M.: Graph-based multimodal sequential embedding for sign language translation. IEEE Trans. Multimedia (2021). https:\/\/doi.org\/10.1109\/TMM.2021.3117124","journal-title":"IEEE Trans. Multimedia"},{"key":"1330_CR18","doi-asserted-by":"publisher","first-page":"10","DOI":"10.1109\/ACCESS.2021.3132668","volume":"9","author":"ZX Zhou","year":"2021","unstructured":"Zhou, Z.X., Tam, V.W.L., Lam, E.Y.: SignBERT: a BERT-based deep learning framework for continuous sign language recognition. IEEE Access 9, 10 (2021). https:\/\/doi.org\/10.1109\/ACCESS.2021.3132668","journal-title":"IEEE Access"},{"issue":"3","key":"1330_CR19","doi-asserted-by":"publisher","first-page":"1138","DOI":"10.1109\/TCSVT.2020.2999384","volume":"31","author":"CC Wei","year":"2021","unstructured":"Wei, C.C., Zhao, J., Zhou, W.G., Li, H.Q.: Semantic boundary detection with reinforcement learning for continuous sign language recognition. IEEE Trans. Circuits Syst. Video Technol. TCSVT 31(3), 1138\u20131149 (2021)","journal-title":"IEEE Trans. Circuits Syst. Video Technol. TCSVT"},{"key":"1330_CR20","doi-asserted-by":"crossref","unstructured":"Zhou, H., Zhou, W.G., Zhou, Y., Li, H.Q.: Spatial-temporal multi-cue network for sign language recognition and translation. IEEE Trans. Multimedia TMM (2021)","DOI":"10.1109\/TMM.2021.3059098"},{"key":"1330_CR21","doi-asserted-by":"crossref","unstructured":"Hu, H.Z., Zhou, W., Li, H.: Hand-model-aware sign language recognition. In: AAAI Conference on Artificial Intelligence (AAAI 2021), pp. 1558\u20131566","DOI":"10.1609\/aaai.v35i2.16247"},{"key":"1330_CR22","doi-asserted-by":"crossref","unstructured":"He, K., Zhang, X., Ren, S., Sun, J.: Deep residual learning for image recognition. In: IEEE Conference on Computer Vision Pattern Recognition. IEEE Computer Society (2016)","DOI":"10.1109\/CVPR.2016.90"},{"key":"1330_CR23","doi-asserted-by":"crossref","unstructured":"Deng, J., Dong, W., Socher, R., Li, L.-J., Li, K., Fei-Fei, et al.: ImageNet: a large-scale hierarchical image database. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2009)","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"1330_CR24","doi-asserted-by":"publisher","unstructured":"Kozlov, A., Andronov, V., Gritsenko, Y.: Lightweight network architecture for real-time action recognition. In: Proceedings of the 35th Annual ACM Symposium on Applied Computing, pp. 2074\u20132080 (2020) https:\/\/doi.org\/10.1145\/3341105.3373906","DOI":"10.1145\/3341105.3373906"},{"key":"1330_CR25","unstructured":"Vaswani, A., Shazeer, N., Parmar, N., et al.: Attention is all you need. Adv. Neural Inf. Process. Syst. pp. 5998\u20136008 (2020)"},{"key":"1330_CR26","doi-asserted-by":"crossref","unstructured":"Papineni, K., Roukos, S., Ward, T., Zhu, W.-J.: Bleu: a method for automatic evaluation of machine translation. In: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics, pp. 311\u2013318 (2002)","DOI":"10.3115\/1073083.1073135"},{"key":"1330_CR27","unstructured":"Bahdanau, D., Cho, K., Bengio, Y.: Neural machine translation by jointly learning to align and translate. In: ICLR (2015)"},{"issue":"1","key":"1330_CR28","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1109\/TSMCA.2006.886347","volume":"37","author":"G Fang","year":"2007","unstructured":"Fang, G., Gao, W., Zhao, D.: Large-vocabulary continuous sign language recognition based on transition-movement models. IEEE Trans. Syst. Man Cybern. 37(1), 1\u20139 (2007)","journal-title":"IEEE Trans. Syst. Man Cybern."},{"key":"1330_CR29","doi-asserted-by":"crossref","unstructured":"Rennie, S.J., Marcheret, E., Mroueh, Y., Ross, J., Goel, V.: Self-critical sequence training for image captioning. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (CVPR 2017)","DOI":"10.1109\/CVPR.2017.131"},{"key":"1330_CR30","doi-asserted-by":"crossref","unstructured":"Huang, J., et al.: Video-based sign language recognition without temporal segmentation. In: The Thirty-Second AAAI Conference on Artificial Intelligence (AAAI-18), pp. 2257\u20132264 (2018)","DOI":"10.1609\/aaai.v32i1.11903"},{"key":"1330_CR31","unstructured":"Wang, L., Xiong, Y., Wang, Z., Qiao, Y., et al.: Temporal segment networks: towards good practices for deep action recognition. IEEE Trans. Pattern Anal. Mach. Intell. (2018)"},{"key":"1330_CR32","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Xu, H., Donahue, J., Rohrbach, M., Mooney, R., Saenko, K.: Translating videos to natural language using deep recurrent neural networks (2014). arXiv preprint arXiv:1412.4729","DOI":"10.3115\/v1\/N15-1173"},{"key":"1330_CR33","doi-asserted-by":"crossref","unstructured":"Venugopalan, S., Rohrbach, M., Donahue, J., Mooney, R., Darrell, T., Saenko, K.: Sequence to sequence-video to text. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4534\u20134542 (2015)","DOI":"10.1109\/ICCV.2015.515"},{"key":"1330_CR34","doi-asserted-by":"crossref","unstructured":"Yao, L., Torabi, A., Cho, K., Ballas, N., Pal, C., Larochelle, H., Courville, A.: Describing videos by exploiting temporal structure. In: Proceedings of the IEEE International Conference on Computer Vision, pp. 4507\u20134515 (2015)","DOI":"10.1109\/ICCV.2015.512"},{"key":"1330_CR35","doi-asserted-by":"crossref","unstructured":"Pan, Y., Mei, T., Yao, T., Li, H., Rui, Y.: Jointly modeling embedding and translation to bridge video and language (2015). arXiv preprint arXiv:1505.01861","DOI":"10.1109\/CVPR.2016.497"},{"key":"1330_CR36","doi-asserted-by":"publisher","first-page":"28","DOI":"10.1016\/j.patrec.2016.03.030","volume":"78","author":"W Yang","year":"2016","unstructured":"Yang, W., Tao, J., Ye, Z.: Continuous sign language recognition using level building based on fast hidden Markov model. Pattern Recognit. Lett. 78, 28\u201335 (2016)","journal-title":"Pattern Recognit. Lett."},{"key":"1330_CR37","doi-asserted-by":"crossref","unstructured":"Zhang, J., Zhou, W., Li, H.: A threshold-based HMM-DTW approach for continuous sign language recognition. In: Proceedings of ACM International Conference on Internet Multimedia Computing and Service, p. 237 (2014)","DOI":"10.1145\/2632856.2632931"}],"container-title":["Machine Vision and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-022-01330-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s00138-022-01330-w\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s00138-022-01330-w.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2022,9,12]],"date-time":"2022-09-12T16:11:19Z","timestamp":1662999079000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s00138-022-01330-w"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2022,8,24]]},"references-count":37,"journal-issue":{"issue":"5","published-print":{"date-parts":[[2022,9]]}},"alternative-id":["1330"],"URL":"https:\/\/doi.org\/10.1007\/s00138-022-01330-w","relation":{},"ISSN":["0932-8092","1432-1769"],"issn-type":[{"type":"print","value":"0932-8092"},{"type":"electronic","value":"1432-1769"}],"subject":[],"published":{"date-parts":[[2022,8,24]]},"assertion":[{"value":"8 September 2021","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"31 May 2022","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 July 2022","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"24 August 2022","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}}],"article-number":"79"}}