{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,1]],"date-time":"2026-04-01T11:01:48Z","timestamp":1775041308777,"version":"3.50.1"},"publisher-location":"New York, NY, USA","reference-count":53,"publisher":"ACM","license":[{"start":{"date-parts":[[2024,10,28]],"date-time":"2024-10-28T00:00:00Z","timestamp":1730073600000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/creativecommons.org\/licenses\/by-nd\/4.0\/"}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2024,10,28]]},"DOI":"10.1145\/3664647.3681578","type":"proceedings-article","created":{"date-parts":[[2024,10,26]],"date-time":"2024-10-26T06:59:49Z","timestamp":1729925989000},"page":"9387-9396","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":6,"title":["Siformer: Feature-isolated Transformer for Efficient Skeleton-based Sign Language Recognition"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0000-8597-1938","authenticated-orcid":false,"given":"Muxin","family":"Pu","sequence":"first","affiliation":[{"name":"School of Information Technology, Monash University Malaysia, Subang Jaya, Selangor, Malaysia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0001-8834-9933","authenticated-orcid":false,"given":"Mei Kuan","family":"Lim","sequence":"additional","affiliation":[{"name":"School of Information Technology, Monash University Malaysia, Subang Jaya, Selangor, Malaysia"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-1164-0049","authenticated-orcid":false,"given":"Chun Yong","family":"Chong","sequence":"additional","affiliation":[{"name":"School of Information Technology, Monash University Malaysia, Subang Jaya, Selangor, Malaysia"}]}],"member":"320","published-online":{"date-parts":[[2024,10,28]]},"reference":[{"key":"e_1_3_2_2_1_1","unstructured":"2024. Deafness and hearing loss. https:\/\/www.who.int\/news-room\/fact-sheets\/ detail\/deafness-and-hearing-loss. Accessed: 2024--3--31."},{"key":"e_1_3_2_2_2_1","volume-title":"Proceedings of the IEEE\/CVF winter conference on applications of computer vision. 182--191","author":"Hr\u00faz Marek","year":"2022","unstructured":"Marek Hr\u00faz. 2022. Sign pose-based transformer for wordlevel sign language recognition. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision. 182--191."},{"key":"e_1_3_2_2_3_1","doi-asserted-by":"publisher","DOI":"10.3389\/frobt.2020.594196"},{"key":"e_1_3_2_2_4_1","volume-title":"Proceedings of the IEEE\/CVF winter conference on applications of computer vision. 2735--2744","author":"Cai Jinmiao","year":"2021","unstructured":"Jinmiao Cai, Nianjuan Jiang, Xiaoguang Han, Kui Jia, and Jiangbo Lu. 2021. JOLOGCN: mining joint-centered light-weight information for skeleton-based action recognition. In Proceedings of the IEEE\/CVF winter conference on applications of computer vision. 2735--2744."},{"key":"e_1_3_2_2_5_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR42600.2020.01004"},{"key":"e_1_3_2_2_6_1","volume-title":"Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805","author":"Devlin Jacob","year":"2018","unstructured":"Jacob Devlin, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. 2018. Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805 (2018)."},{"key":"e_1_3_2_2_7_1","volume-title":"Proceedings of the IEEE conference on computer vision and pattern recognition. 1110--1118","author":"Du Yong","year":"2015","unstructured":"Yong Du, Wei Wang, and Liang Wang. 2015. Hierarchical recurrent neural network for skeleton based action recognition. In Proceedings of the IEEE conference on computer vision and pattern recognition. 1110--1118."},{"key":"e_1_3_2_2_8_1","doi-asserted-by":"publisher","DOI":"10.1016\/j.ins.2019.07.070"},{"key":"e_1_3_2_2_9_1","volume-title":"cognition, and the brain: Insights from sign language research","author":"Emmorey Karen","unstructured":"Karen Emmorey. 2001. Language, cognition, and the brain: Insights from sign language research. Psychology Press."},{"key":"e_1_3_2_2_10_1","volume-title":"Reducing transformer depth on demand with structured dropout. arXiv preprint arXiv:1909.11556","author":"Fan Angela","year":"2019","unstructured":"Angela Fan, Edouard Grave, and Armand Joulin. 2019. Reducing transformer depth on demand with structured dropout. arXiv preprint arXiv:1909.11556 (2019)."},{"key":"e_1_3_2_2_11_1","volume-title":"Functional anatomy for physical therapists","author":"Hochschild Jutta","unstructured":"Jutta Hochschild. 2016. Functional anatomy for physical therapists. Georg Thieme Verlag."},{"key":"e_1_3_2_2_12_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV48630.2021.00347"},{"key":"e_1_3_2_2_13_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2023.3269220"},{"key":"e_1_3_2_2_14_1","volume-title":"International Conference on Learning Representations.","author":"Hu Ting-Kuei","year":"2019","unstructured":"Ting-Kuei Hu, Tianlong Chen, HaotaoWang, and ZhangyangWang. 2019. Triple Wins: Boosting Accuracy, Robustness and Efficiency Together by Enabling Input- Adaptive Inference. In International Conference on Learning Representations."},{"key":"e_1_3_2_2_15_1","doi-asserted-by":"publisher","DOI":"10.1109\/TCSVT.2018.2870740"},{"key":"e_1_3_2_2_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.137"},{"key":"e_1_3_2_2_17_1","doi-asserted-by":"publisher","DOI":"10.3389\/frai.2022.759255"},{"key":"e_1_3_2_2_18_1","volume-title":"Australian Sign Language (Auslan): An introduction to sign language linguistics","author":"Johnston Trevor","unstructured":"Trevor Johnston and Adam Schembri. 2007. Australian Sign Language (Auslan): An introduction to sign language linguistics. Cambridge University Press."},{"key":"e_1_3_2_2_19_1","volume-title":"International conference on machine learning. PMLR, 3301--3310","author":"Kaya Yigitcan","year":"2019","unstructured":"Yigitcan Kaya, Sanghyun Hong, and Tudor Dumitras. 2019. Shallow-deep networks: Understanding and mitigating network overthinking. In International conference on machine learning. PMLR, 3301--3310."},{"key":"e_1_3_2_2_20_1","volume-title":"Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907","author":"Kipf Thomas N","year":"2016","unstructured":"Thomas N Kipf and MaxWelling. 2016. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907 (2016)."},{"key":"e_1_3_2_2_21_1","doi-asserted-by":"publisher","DOI":"10.1109\/IST.2018.8577085"},{"key":"e_1_3_2_2_22_1","volume-title":"Sign language recognition based on hand and body skeletal data. In 2018--3DTV-conference: The true vision-capture, transmission and display of 3D video (3DTV-Con)","author":"Konstantinidis Dimitrios","unstructured":"Dimitrios Konstantinidis, Kosmas Dimitropoulos, and Petros Daras. 2018. Sign language recognition based on hand and body skeletal data. In 2018--3DTV-conference: The true vision-capture, transmission and display of 3D video (3DTV-Con). IEEE, 1--4."},{"key":"e_1_3_2_2_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACV45572.2020.9093512"},{"key":"e_1_3_2_2_24_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00371"},{"key":"e_1_3_2_2_25_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00572"},{"key":"e_1_3_2_2_26_1","volume-title":"Proceedings, Part III 14","author":"Liu Jun","year":"2016","unstructured":"Jun Liu, Amir Shahroudy, Dong Xu, and Gang Wang. 2016. Spatio-temporal lstm with trust gates for 3d human action recognition. In Computer Vision--ECCV 2016: 14th European Conference, Amsterdam, The Netherlands, October 11--14, 2016, Proceedings, Part III 14. Springer, 816--833."},{"key":"e_1_3_2_2_27_1","volume-title":"Are sixteen heads really better than one? Advances in neural information processing systems 32","author":"Michel Paul","year":"2019","unstructured":"Paul Michel, Omer Levy, and Graham Neubig. 2019. Are sixteen heads really better than one? Advances in neural information processing systems 32 (2019)."},{"key":"e_1_3_2_2_28_1","unstructured":"Anna Mindess. 2014. Reading between the signs: Intercultural communication for sign language interpreters. Nicholas Brealey."},{"key":"e_1_3_2_2_29_1","doi-asserted-by":"publisher","DOI":"10.1109\/MET59151.2023.00012"},{"key":"e_1_3_2_2_30_1","volume-title":"LSA64: A Dataset of Argentinian Sign Language. XX II Congreso Argentino de Ciencias de la Computaci\u00f3n (CACIC)","author":"Ronchetti Franco","year":"2016","unstructured":"Franco Ronchetti, Facundo Quiroga, Cesar Estrebou, Laura Lanzarini, and Alejandro Rosete. 2016. LSA64: A Dataset of Argentinian Sign Language. XX II Congreso Argentino de Ciencias de la Computaci\u00f3n (CACIC) (2016)."},{"key":"e_1_3_2_2_31_1","volume-title":"Thieme Atlas of Anatomy: General Anatomy and Musculoskeletal System","author":"Ross Lawrence M","unstructured":"Lawrence M Ross, Edward D Lamperti, Michael Sch\u00fcnke, Erik Schulte, Udo Schumacher, Markus Voll, and Karl Wesker. 2006. Thieme Atlas of Anatomy: General Anatomy and Musculoskeletal System. Georg Thieme Verlag."},{"key":"e_1_3_2_2_32_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV.2017.48"},{"key":"e_1_3_2_2_33_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_2_34_1","unstructured":"Jai Amrish Shah et al. 2018. Deepsign: A deep-learning architecture for sign language. Ph.D. Dissertation."},{"key":"e_1_3_2_2_35_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v31i1.11212"},{"key":"e_1_3_2_2_36_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1\/D19-1441"},{"key":"e_1_3_2_2_37_1","volume-title":"International conference on machine learning. PMLR, 6105--6114","author":"Tan Mingxing","year":"2019","unstructured":"Mingxing Tan and Quoc Le. 2019. Efficientnet: Rethinking model scaling for convolutional neural networks. In International conference on machine learning. PMLR, 6105--6114."},{"key":"e_1_3_2_2_38_1","doi-asserted-by":"publisher","DOI":"10.1109\/TPAMI.2016.2599170"},{"key":"e_1_3_2_2_39_1","doi-asserted-by":"publisher","DOI":"10.1145\/2629500"},{"key":"e_1_3_2_2_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/WACVW52041.2021.00008"},{"key":"e_1_3_2_2_41_1","volume-title":"Linguistics of American sign language: An introduction","author":"Valli Clayton","unstructured":"Clayton Valli and Ceil Lucas. 2000. Linguistics of American sign language: An introduction. Gallaudet University Press."},{"key":"e_1_3_2_2_42_1","volume-title":"Attention is all you need. Advances in neural information processing systems 30","author":"Vaswani Ashish","year":"2017","unstructured":"Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. Advances in neural information processing systems 30 (2017)."},{"key":"e_1_3_2_2_43_1","volume-title":"Analyzing multi-head self-attention: Specialized heads do the heavy lifting, the rest can be pruned. arXiv preprint arXiv:1905.09418","author":"Voita Elena","year":"2019","unstructured":"Elena Voita, David Talbot, Fedor Moiseev, Rico Sennrich, and Ivan Titov. 2019. Analyzing multi-head self-attention: Specialized heads do the heavy lifting, the rest can be pruned. arXiv preprint arXiv:1905.09418 (2019)."},{"key":"e_1_3_2_2_44_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-030-01261-8_25"},{"key":"e_1_3_2_2_45_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v37i3.25378"},{"key":"e_1_3_2_2_46_1","doi-asserted-by":"publisher","DOI":"10.18653\/v1"},{"key":"e_1_3_2_2_47_1","volume-title":"Bertof- theseus: Compressing bert by progressive module replacing. arXiv preprint arXiv:2002.02925","author":"Xu Canwen","year":"2020","unstructured":"Canwen Xu, Wangchunshu Zhou, Tao Ge, Furu Wei, and Ming Zhou. 2020. Bertof- theseus: Compressing bert by progressive module replacing. arXiv preprint arXiv:2002.02925 (2020)."},{"key":"e_1_3_2_2_48_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICIEA.2010.5514688"},{"key":"e_1_3_2_2_49_1","volume-title":"Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4866--4874","author":"Yuan Shanxin","year":"2017","unstructured":"Shanxin Yuan, Qi Ye, Bjorn Stenger, Siddhant Jain, and Tae-Kyun Kim. 2017. Bighand2. 2m benchmark: Hand pose dataset and state of the art analysis. In Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 4866--4874."},{"key":"e_1_3_2_2_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW.2018.00232"},{"key":"e_1_3_2_2_51_1","doi-asserted-by":"publisher","DOI":"10.3390\/fi11040091"},{"key":"e_1_3_2_2_52_1","doi-asserted-by":"publisher","DOI":"10.1609\/aaai.v35i12.17325"},{"key":"e_1_3_2_2_53_1","first-page":"18330","article-title":"Bert loses patience: Fast and robust inference with early exit","volume":"33","author":"Zhou Wangchunshu","year":"2020","unstructured":"Wangchunshu Zhou, Canwen Xu, Tao Ge, Julian McAuley, Ke Xu, and Furu Wei. 2020. Bert loses patience: Fast and robust inference with early exit. Advances in Neural Information Processing Systems 33 (2020), 18330--18341.","journal-title":"Advances in Neural Information Processing Systems"}],"event":{"name":"MM '24: The 32nd ACM International Conference on Multimedia","location":"Melbourne VIC Australia","acronym":"MM '24","sponsor":["SIGMM ACM Special Interest Group on Multimedia"]},"container-title":["Proceedings of the 32nd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681578","content-type":"unspecified","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3664647.3681578","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,6,19]],"date-time":"2025-06-19T01:17:48Z","timestamp":1750295868000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3664647.3681578"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2024,10,28]]},"references-count":53,"alternative-id":["10.1145\/3664647.3681578","10.1145\/3664647"],"URL":"https:\/\/doi.org\/10.1145\/3664647.3681578","relation":{},"subject":[],"published":{"date-parts":[[2024,10,28]]},"assertion":[{"value":"2024-10-28","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}