{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,6,20]],"date-time":"2026-06-20T16:46:47Z","timestamp":1781974007044,"version":"3.54.5"},"reference-count":49,"publisher":"Springer Science and Business Media LLC","issue":"31","license":[{"start":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T00:00:00Z","timestamp":1741737600000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T00:00:00Z","timestamp":1741737600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"funder":[{"DOI":"10.13039\/501100003968","name":"Iranian National Science Foundation","doi-asserted-by":"crossref","award":["4031030"],"award-info":[{"award-number":["4031030"]}],"id":[{"id":"10.13039\/501100003968","id-type":"DOI","asserted-by":"crossref"}]}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Multimed Tools Appl"],"DOI":"10.1007\/s11042-025-20721-5","type":"journal-article","created":{"date-parts":[[2025,3,12]],"date-time":"2025-03-12T04:13:46Z","timestamp":1741752826000},"page":"38873-38891","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":9,"title":["Continuous sign language recognition using intra-inter gloss attention"],"prefix":"10.1007","volume":"84","author":[{"given":"Hossein","family":"Ranjbar","sequence":"first","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-5826-260X","authenticated-orcid":false,"given":"Alireza","family":"Taheri","sequence":"additional","affiliation":[],"role":[{"vocabulary":"crossref","role":"author"}]}],"member":"297","published-online":{"date-parts":[[2025,3,12]]},"reference":[{"issue":"6","key":"20721_CR1","doi-asserted-by":"publisher","first-page":"873","DOI":"10.1109\/TPAMI.2005.112","volume":"27","author":"SC Ong","year":"2005","unstructured":"Ong SC, Ranganath S (2005) Automatic sign language analysis: a survey and the future beyond lexical meaning. IEEE Trans Pattern Anal Mach Intell 27(6):873\u2013891. https:\/\/doi.org\/10.1109\/TPAMI.2005.112","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"20721_CR2","doi-asserted-by":"crossref","unstructured":"Cheng KL, Yang Z, Chen Q, Tai YW (2020) Fully convolutional networks for continuous sign language recognition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, Proceedings, Part XXIV 16 (pp 697\u2013714). Springer International Publishing","DOI":"10.1007\/978-3-030-58586-0_41"},{"issue":"7","key":"20721_CR3","doi-asserted-by":"publisher","first-page":"1880","DOI":"10.1109\/TMM.2018.2889563","volume":"21","author":"R Cui","year":"2019","unstructured":"Cui R, Liu Hu, Zhang C (2019) A deep neural framework for continuous sign language recognition by iterative training. IEEE Trans Multimedia 21(7):1880\u20131891","journal-title":"IEEE Trans Multimedia"},{"key":"20721_CR4","doi-asserted-by":"crossref","unstructured":"Niu Z, Mak B (2020) Stochastic fine-grained labeling of multi-state sign glosses for continuous sign language recognition. In: Computer Vision\u2013ECCV 2020: 16th European Conference, Glasgow, Proceedings, Part XVI 16 (pp 172\u2013186). Springer International Publishing","DOI":"10.1007\/978-3-030-58517-4_11"},{"key":"20721_CR5","first-page":"4810","volume":"2022","author":"R Zuo","year":"2022","unstructured":"Zuo R, Mak B (2022) Local context-aware self-attention for continuous sign language recognition}}. Proc Interspeech 2022:4810\u20134814","journal-title":"Proc Interspeech"},{"key":"20721_CR6","doi-asserted-by":"crossref","unstructured":"Pu J, Zhou W, Li H (2019) Iterative alignment network for continuous sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition (pp 4165\u20134174)","DOI":"10.1109\/CVPR.2019.00429"},{"key":"20721_CR7","unstructured":"Li R, Meng L (2022) Multi-view spatial-temporal network for continuous sign language recognition.\u00a0arXiv preprint arXiv:2204.08747"},{"key":"20721_CR8","doi-asserted-by":"crossref","unstructured":"Pu J, Zhou W, Hu H, Li H (2020) Boosting continuous sign language recognition via cross modality augmentation. In: Proceedings of the 28th ACM International Conference on Multimedia (pp 1497\u20131505)","DOI":"10.1145\/3394171.3413931"},{"key":"20721_CR9","doi-asserted-by":"crossref","unstructured":"Slimane FB, Bouguessa M (2021) Context matters: Self-attention for sign language recognition. In: 2020 25th International Conference on Pattern Recognition (ICPR), IEEE, pp 7884\u20137891","DOI":"10.1109\/ICPR48806.2021.9412916"},{"key":"20721_CR10","doi-asserted-by":"crossref","unstructured":"Camgoz NC, Koller O, Hadfield S, Bowden R (2020) Sign language transformers: Joint end-to-end sign language recognition and translation. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition,\u00a0pp 10023\u201310033","DOI":"10.1109\/CVPR42600.2020.01004"},{"key":"20721_CR11","doi-asserted-by":"crossref","unstructured":"Zheng J, Wang Y, Tan C, Li S, Wang G, Xia J., ... Li SZ (2023) Cvt-slr: Contrastive visual-textual transformation for sign language recognition with variational alignment. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 23141\u201323150","DOI":"10.1109\/CVPR52729.2023.02216"},{"issue":"9","key":"20721_CR12","doi-asserted-by":"publisher","first-page":"2306","DOI":"10.1109\/TPAMI.2019.2911077","volume":"42","author":"O Koller","year":"2019","unstructured":"Koller O, Camgoz NC, Ney H, Bowden R (2019) Weakly supervised learning with multi-stream CNN-LSTM-HMMs to discover sequential parallelism in sign language videos. IEEE Trans Pattern Anal Mach Intell 42(9):2306\u20132320","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"20721_CR13","unstructured":"Dosovitskiy A, Beyer L, Kolesnikov A, Weissenborn D, Zhai X, Unterthiner T., ... Houlsby N (2020) An image is worth 16x16 words: transformers for image recognition at scale. arXiv preprint. arXiv:2010.11929"},{"key":"20721_CR14","unstructured":"Vaswani A, Shazeer N, Parmar N, Uszkoreit J, Jones L, Gomez AN., ... Polosukhin I (2017) Attention is all you need. Adv Neural Inf Process Syst 30"},{"key":"20721_CR15","doi-asserted-by":"crossref","unstructured":"Esser P, Rombach R, Ommer B (2021) Taming transformers for high-resolution image synthesis. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition pp 12873\u201312883","DOI":"10.1109\/CVPR46437.2021.01268"},{"key":"20721_CR16","unstructured":"Li Y, Zhang K, Cao J, Timofte R, Van Gool L (2021) Localvit: bringing locality to vision transformers.\u00a0arXiv preprint arXiv:2104.05707"},{"key":"20721_CR17","doi-asserted-by":"publisher","first-page":"108","DOI":"10.1016\/j.cviu.2015.09.013","volume":"141","author":"O Koller","year":"2015","unstructured":"Koller O, Forster J, Ney H (2015) Continuous sign language recognition: Towards large vocabulary statistical recognition systems handling multiple signers. Comput Vis Image Underst 141:108\u2013125","journal-title":"Comput Vis Image Underst"},{"key":"20721_CR18","doi-asserted-by":"crossref","unstructured":"Sandler M, Howard A, Zhu M, Zhmoginov A, Chen LC (2018) Mobilenetv2: inverted residuals and linear bottlenecks. In:\u00a0Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 4510\u20134520","DOI":"10.1109\/CVPR.2018.00474"},{"key":"20721_CR19","doi-asserted-by":"crossref","unstructured":"Dong L, Xu S, Xu B (2018) Speech-transformer: a no-recurrence sequence-to-sequence model for speech recognition. In\u00a02018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, pp 5884\u20135888","DOI":"10.1109\/ICASSP.2018.8462506"},{"key":"20721_CR20","unstructured":"Amodei D, Ananthanarayanan S, Anubhai R, Bai J, Battenberg E, Case C., ... Zhu Z (2016) Deep speech 2: End-to-end speech recognition in english and mandarin. In: International Conference on Machine Learning, PMLR, pp 173\u2013182"},{"issue":"5","key":"20721_CR21","doi-asserted-by":"publisher","first-page":"855","DOI":"10.1109\/TPAMI.2008.137","volume":"31","author":"A Graves","year":"2008","unstructured":"Graves A, Liwicki M, Fern\u00e1ndez S, Bertolami R, Bunke H, Schmidhuber J (2008) A novel connectionist system for unconstrained handwriting recognition. IEEE Trans Pattern Anal Mach Intell 31(5):855\u2013868","journal-title":"IEEE Trans Pattern Anal Mach Intell"},{"key":"20721_CR22","unstructured":"Assael YM, Shillingford B, Whiteson S, De Freitas N (2016) Lipnet: end-to-end sentence-level lipreading.\u00a0arXiv preprint arXiv:1611.01599"},{"key":"20721_CR23","doi-asserted-by":"crossref","unstructured":"Huang DA, Fei-Fei L, Niebles JC (2016)\u00a0Connectionist temporal modeling for weakly supervised action labeling. In Computer Vision\u2013ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11\u201314, 2016, Proceedings, Part IV 14. Springer International Publishing pp 137\u2013153","DOI":"10.1007\/978-3-319-46493-0_9"},{"key":"20721_CR24","doi-asserted-by":"crossref","unstructured":"Cihan Camgoz N, Hadfield S, Koller O, Bowden R (2017) Subunets: end-to-end hand shape and continuous sign language recognition. In: Proceedings of the IEEE International Conference on Computer Vision, pp 3056\u20133065","DOI":"10.1109\/ICCV.2017.332"},{"key":"20721_CR25","unstructured":"Lugaresi C, Tang J, Nash H, McClanahan C, Uboweja E, Hays M., ... Grundmann M (2019) Mediapipe: a framework for building perception pipelines. arXiv preprint arXiv:1906.08172"},{"issue":"1\u20132","key":"20721_CR26","first-page":"21","volume":"5","author":"TY Pan","year":"2018","unstructured":"Pan TY, Lo LY, Yeh CW, Li JW, Liu HT, Hu MC (2018) Sign language recognition in complex background scene based on adaptive skin color modeling and support vector machine. Int J Big Data Intell 5(1\u20132):21\u201330","journal-title":"Int J Big Data Intell"},{"key":"20721_CR27","doi-asserted-by":"crossref","unstructured":"Pugeault N, Bowden R (2011) Spelling it out: real-time ASL fingerspelling recognition. In: 2011 IEEE International Conference on Computer Vision Workshops (ICCV workshops), IEEE, pp 1114\u20131119","DOI":"10.1109\/ICCVW.2011.6130290"},{"key":"20721_CR28","doi-asserted-by":"crossref","unstructured":"Li D, Yu X, Xu C, Petersson L, Li H (2020) Transferring cross-domain knowledge for video sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 6205\u20136214","DOI":"10.1109\/CVPR42600.2020.00624"},{"issue":"11","key":"20721_CR29","doi-asserted-by":"publisher","first-page":"1724","DOI":"10.1093\/comjnl\/bxy049","volume":"61","author":"ZJ Liang","year":"2018","unstructured":"Liang ZJ, Liao SB, Hu BZ (2018) 3D convolutional neural networks for dynamic sign language recognition. Comput J 61(11):1724\u20131736","journal-title":"Comput J"},{"key":"20721_CR30","doi-asserted-by":"crossref","unstructured":"Jiang S, Sun B, Wang L, Bai Y, Li K, Fu Y (2021) Skeleton aware multi-modal sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 3413\u20133423","DOI":"10.1109\/CVPRW53098.2021.00380"},{"key":"20721_CR31","doi-asserted-by":"crossref","unstructured":"Koller O, Zargaran O, Ney H, Bowden R (2016) Deep sign: hybrid CNN-HMM for continuous sign language recognition. In: Proceedings of the British Machine Vision Conference 2016","DOI":"10.5244\/C.30.136"},{"key":"20721_CR32","doi-asserted-by":"crossref","unstructured":"Liu Z, Lin Y, Cao Y, Hu H, Wei Y, Zhang Z., ... Guo B (2021) Swin transformer: hierarchical vision transformer using shifted windows. In: Proceedings of the IEEE\/CVF international conference on computer vision, pp 10012\u201310022","DOI":"10.1109\/ICCV48922.2021.00986"},{"key":"20721_CR33","doi-asserted-by":"crossref","unstructured":"Fan H, Xiong B, Mangalam K, Li Y, Yan Z, Malik J, Feichtenhofer C (2021) Multiscale vision transformers. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 6824\u20136835","DOI":"10.1109\/ICCV48922.2021.00675"},{"issue":"1","key":"20721_CR34","first-page":"854","volume":"37","author":"L Hu","year":"2023","unstructured":"Hu L, Gao L, Liu Z, Feng W (2023) Self-emphasizing network for continuous sign language recognition. Proc AAAI Conf Artif Intell 37(1):854\u2013862","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"20721_CR35","doi-asserted-by":"crossref","unstructured":"Hu L, Gao L, Liu Z, Feng W (2023) Continuous sign language recognition with correlation network. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 2529\u20132539","DOI":"10.1109\/CVPR52729.2023.00249"},{"key":"20721_CR36","doi-asserted-by":"crossref","unstructured":"Zuo R, Mak B (2022) C2slr: Consistency-enhanced continuous sign language recognition. In: Proceedings of the IEEE\/CVF Conference on Computer Vision and Pattern Recognition, pp 5131\u20135140","DOI":"10.1109\/CVPR52688.2022.00507"},{"key":"20721_CR37","doi-asserted-by":"publisher","first-page":"109903","DOI":"10.1016\/j.patcog.2023.109903","volume":"145","author":"L Hu","year":"2024","unstructured":"Hu L, Gao L, Liu Z, Feng W (2024) Scalable frame resolution for efficient continuous sign language recognition. Pattern Recogn 145:109903","journal-title":"Pattern Recogn"},{"key":"20721_CR38","doi-asserted-by":"crossref","unstructured":"Huang Z, Xue W, Zhou Y, Sun J, Wu Y, Yuan T, Chen S (2024) Dual-stage temporal perception network for continuous sign language recognition. Vis Comput 1\u201316","DOI":"10.1007\/s00371-024-03516-x"},{"issue":"07","key":"20721_CR39","first-page":"13009","volume":"34","author":"H Zhou","year":"2020","unstructured":"Zhou H, Zhou W, Zhou Y, Li H (2020) Spatial-temporal multi-cue network for continuous sign language recognition. Proc AAAI Conf Artif Intell 34(07):13009\u201313016","journal-title":"Proc AAAI Conf Artif Intell"},{"key":"20721_CR40","first-page":"492","volume-title":"European Conference on Computer Vision","author":"CL Zhang","year":"2022","unstructured":"Zhang CL, Wu J, Li Y (2022) Actionformer: Localizing moments of actions with transformers. European Conference on Computer Vision. Springer Nature Switzerland, Cham, pp 492\u2013510"},{"issue":"12","key":"20721_CR41","doi-asserted-by":"publisher","first-page":"2213","DOI":"10.1109\/TASLP.2019.2944078","volume":"27","author":"Q Guo","year":"2019","unstructured":"Guo Q, Qiu X, Xue X, Zhang Z (2019) Low-rank and locality constrained self-attention for sequence modeling. IEEE\/ACM Trans Audio Speech Lang Process 27(12):2213\u20132222","journal-title":"IEEE\/ACM Trans Audio Speech Lang Process"},{"key":"20721_CR42","unstructured":"Paszke A, Gross S, Massa F, Lerer A, Bradbury J, Chanan G, ... Chintala S (2019) Pytorch: an imperative style, high-performance deep learning library. Adv Neural Inf Process Syst 32"},{"key":"20721_CR43","unstructured":"Glorot X, Bengio Y (2010) Understanding the difficulty of training deep feedforward neural networks. In\u00a0Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics, JMLR Workshop and Conference Proceedings, pp 249\u2013256"},{"key":"20721_CR44","doi-asserted-by":"crossref","unstructured":"Deng J, Dong W, Socher R, Li LJ, Li K, Fei-Fei L (2009) Imagenet: a large-scale hierarchical image database. In: 2009 IEEE conference on computer vision and pattern recognition, IEEE, pp 248\u2013255","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"20721_CR45","unstructured":"Kingma DP, Ba J (2014) Adam: a method for stochastic optimization.\u00a0arXiv preprint arXiv:1412.6980"},{"key":"20721_CR46","doi-asserted-by":"crossref","unstructured":"He K, Zhang X, Ren S, Sun J (2016) Deep residual learning for image recognition. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, pp 770\u2013778","DOI":"10.1109\/CVPR.2016.90"},{"key":"20721_CR47","doi-asserted-by":"crossref","unstructured":"Min Y, Hao A, Chai X, Chen X (2021) Visual alignment constraint for continuous sign language recognition. In: Proceedings of the IEEE\/CVF International Conference on Computer Vision, pp 11542\u201311551","DOI":"10.1109\/ICCV48922.2021.01111"},{"key":"20721_CR48","first-page":"17043","volume":"35","author":"Y Chen","year":"2022","unstructured":"Chen Y, Zuo R, Wei F, Wu Y, Liu S, Mak B (2022) Two-stream network for sign language recognition and translation. Adv Neural Inf Process Syst 35:17043\u201317056","journal-title":"Adv Neural Inf Process Syst"},{"issue":"7","key":"20721_CR49","first-page":"8","volume":"2","author":"D Guo","year":"2019","unstructured":"Guo D, Wang S, Tian Q, Wang M (2019) Dense temporal convolution network for sign language translation. In IJCAI 2(7):8","journal-title":"In IJCAI"}],"container-title":["Multimedia Tools and Applications"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-025-20721-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/article\/10.1007\/s11042-025-20721-5\/fulltext.html","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/s11042-025-20721-5.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,17]],"date-time":"2025-09-17T09:43:11Z","timestamp":1758102191000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/s11042-025-20721-5"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,3,12]]},"references-count":49,"journal-issue":{"issue":"31","published-online":{"date-parts":[[2025,9]]}},"alternative-id":["20721"],"URL":"https:\/\/doi.org\/10.1007\/s11042-025-20721-5","relation":{},"ISSN":["1573-7721"],"issn-type":[{"value":"1573-7721","type":"electronic"}],"subject":[],"published":{"date-parts":[[2025,3,12]]},"assertion":[{"value":"25 November 2023","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"30 October 2024","order":2,"name":"revised","label":"Revised","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"21 February 2025","order":3,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"12 March 2025","order":4,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Declarations"}},{"value":"There were no tests on humans\/animals in this study. We used one of the worldwide available datasets to conduct this study.","order":2,"name":"Ethics","group":{"name":"EthicsHeading","label":"Ethical approval"}},{"value":"In this study, we did not directly involve any participants and used one of the worldwide available datasets. It seems that the informed consent was previously obtained from all individual participants.","order":3,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent to participate"}},{"value":"We have not used any participants' images in this manuscript.","order":4,"name":"Ethics","group":{"name":"EthicsHeading","label":"Consent for publication"}},{"value":"Author Alireza Taheri has received a research grant from the \u201cIranian National Science Foundation (INSF)\u201d () (Grant No. 4031030)\u201d. The author Hossein Ranjbar declares that he has no conflict of interest.","order":5,"name":"Ethics","group":{"name":"EthicsHeading","label":"Conflict of interest"}}]}}